Code
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (haven) # importing SPSS data files
library (furniture) # nice tables of descriptives
library (corrplot) # visualize correlations
## corrplot 0.92 loaded
library (ggplot2) # plot making
library (GGally) # extensions to ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library (sjlabelled) # work with SPSS data
##
## Attaching package: 'sjlabelled'
##
## The following objects are masked from 'package:haven':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
##
## The following object is masked from 'package:forcats':
##
## as_factor
##
## The following object is masked from 'package:dplyr':
##
## as_label
##
## The following object is masked from 'package:ggplot2':
##
## as_label
library (lubridate) # dates
library (lme4) # mixed models
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library (table1) # for producing t1
## Registered S3 methods overwritten by 'table1':
## method from
## as.data.frame.table1 furniture
## print.table1 furniture
##
## Attaching package: 'table1'
##
## The following object is masked from 'package:furniture':
##
## table1
##
## The following objects are masked from 'package:base':
##
## units, units<-
library (sjPlot) # for model outputs
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
library (car) # to calculate VIF
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library (broom) # for function augment
library (lmtest) # assumption tests
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library (sandwich) # heteroskedasticity
library (olsrr) # outlier analysis
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
library (robustbase) # robust regression
setwd ("~/Amber/FINDEM" )
Data
We load and scale the data.
Code
data_clean <- read_sav ("~/Amber/Data/FINDEM/Final_survey.sav" )
data_clean <- data_clean %>% dplyr:: select (- Einnummer, - Cluster, - Date, - Meetweek) %>%
mutate (Gender = factor (Gender) %>% fct_recode ("Male" = "1" , "Female" = "2" )) %>%
mutate (PICA = factor (PICA) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (Intervention_months_factor = factor (case_when (Intervention_months_s < 6 ~ 0 , Intervention_months_s >= 6 & Intervention_months_s < 12 ~ 1 , Intervention_months_s >= 12 & Intervention_months_s < 18 ~ 2 , Intervention_months_s >= 18 & Intervention_months_s < 24 ~ 3 ,Intervention_months_s >= 24 ~ 4 ))) %>%
mutate (CalendarTime = interval (as.Date ('2017-11-01' ), Timepoint)%/% months (1 ))%>%
mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" )) %>%
mutate (Warmglow = as.numeric (Warmglow)) %>% mutate (Warmglow = as.factor (case_when (Warmglow >= 6 ~ 1 , TRUE & ! is.na (Warmglow) ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ), StartDate = substr (StartDate, 1 , 10 ), StartDate = as.Date (StartDate, "%Y-%m-%d" ), Meetweek = case_when (StartDate < "2019-11-01" ~ 2 , StartDate > "2019-11-01" ~ 3 ))
data_scale <- data_clean
data_scale$ Age <- scale (data_scale$ Age, center= TRUE , scale = FALSE )
data_scale$ Weight <- scale (data_scale$ Weight, center= TRUE , scale = FALSE )
data_scale$ Height <- scale (data_scale$ Height, center= TRUE , scale = FALSE )
Descriptives
We present a descriptives table for the outcomes we will analyse, including:
PICA
Restless legs syndrome (RLS)
Cognitive functioning (CFQ)
Physical health (SF_ph)
Mental health (SF_mh)
Warm glow
Fatigue (CIS)
Code
#| label: t1 premenopausal females
data_t1 <- data_clean %>% mutate (stap2 = as.factor (stap), stap2 = recode_factor (stap2, "2.1" = "2" ,"2.2" = "2" , "3.1" = "3" , "3.2" = "3" ), stap2 = factor (stap2,levels = c ("1" , "2" , "3" , "4" ))) %>% filter (Meetweek == 2 ) %>% mutate (Hb = replace (Hb, Hb == 999 , NA ), Hb = Hb/ 0.6206 )
prefemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 0 ), caption = "Premenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
prefemalest1
Premenopausal females
Age
Mean (SD)
28.4 (7.61)
28.1 (7.75)
27.3 (7.29)
27.5 (7.75)
27.9 (7.59)
Median [Q1, Q3]
27.0 [22.0, 34.0]
26.0 [22.0, 33.0]
25.0 [21.0, 32.5]
25.0 [21.8, 33.0]
26.0 [22.0, 33.0]
Hb
Mean (SD)
13.5 (0.805)
13.5 (0.775)
13.6 (0.821)
13.5 (0.779)
13.5 (0.797)
Median [Q1, Q3]
13.4 [12.9, 14.0]
13.4 [12.9, 13.9]
13.5 [13.0, 14.2]
13.5 [13.1, 14.0]
13.4 [12.9, 14.0]
Missing
0 (0%)
1 (0.3%)
0 (0%)
1 (0.5%)
2 (0.2%)
Ferritin
Mean (SD)
29.7 (25.0)
29.2 (25.5)
29.3 (35.3)
28.4 (29.4)
29.2 (29.0)
Median [Q1, Q3]
22.7 [12.1, 40.0]
22.6 [11.5, 38.2]
19.9 [10.1, 39.0]
20.8 [11.3, 34.9]
21.6 [11.3, 38.5]
Missing
7 (2.3%)
12 (3.9%)
15 (4.9%)
16 (7.5%)
50 (4.4%)
factor(PICA)
No
283 (94.0)
296 (95.5)
299 (97.4)
197 (92.9)
1075 (95.1)
Yes
5 (1.7)
4 (1.3)
3 (1.0)
10 (4.7)
22 (1.9)
Missing
13 (4.3%)
10 (3.2%)
5 (1.6%)
5 (2.4%)
33 (2.9%)
factor(RLS)
No
237 (78.7)
249 (80.3)
231 (75.2)
164 (77.4)
881 (78.0)
Yes
0 (0)
2 (0.6)
1 (0.3)
3 (1.4)
6 (0.5)
Missing
64 (21.3%)
59 (19.0%)
75 (24.4%)
45 (21.2%)
243 (21.5%)
CFQ_Total
Mean (SD)
30.8 (13.5)
29.3 (11.9)
31.0 (11.9)
32.2 (13.8)
30.7 (12.7)
Median [Q1, Q3]
30.0 [21.5, 37.5]
29.0 [21.0, 38.0]
30.0 [23.0, 38.0]
31.0 [22.3, 41.0]
30.0 [22.0, 38.5]
Missing
14 (4.7%)
10 (3.2%)
5 (1.6%)
6 (2.8%)
35 (3.1%)
SF_ph
Mean (SD)
89.7 (10.2)
90.6 (9.40)
90.3 (9.51)
89.5 (9.54)
90.1 (9.68)
Median [Q1, Q3]
92.5 [86.2, 96.3]
93.1 [87.5, 96.3]
92.5 [87.5, 96.3]
92.4 [85.0, 96.2]
92.5 [86.9, 96.3]
Missing
6 (2.0%)
3 (1.0%)
2 (0.7%)
4 (1.9%)
15 (1.3%)
SF_mh
Mean (SD)
77.4 (13.7)
77.4 (14.3)
77.4 (14.7)
75.8 (15.9)
77.1 (14.6)
Median [Q1, Q3]
82.0 [73.5, 86.5]
82.0 [74.4, 86.3]
82.8 [75.1, 86.5]
80.9 [72.3, 86.3]
82.1 [73.8, 86.5]
Missing
3 (1.0%)
3 (1.0%)
1 (0.3%)
3 (1.4%)
10 (0.9%)
Warmglow
No
119 (39.5)
130 (41.9)
134 (43.6)
109 (51.4)
492 (43.5)
Yes
162 (53.8)
166 (53.5)
163 (53.1)
91 (42.9)
582 (51.5)
Missing
20 (6.6%)
14 (4.5%)
10 (3.3%)
12 (5.7%)
56 (5.0%)
CIS_Totalmean
Mean (SD)
82.9 (6.41)
82.5 (6.05)
82.7 (6.13)
81.9 (5.76)
82.5 (6.12)
Median [Q1, Q3]
84.0 [80.0, 87.0]
83.0 [79.0, 87.0]
84.0 [79.0, 87.0]
83.0 [78.0, 86.0]
83.0 [79.0, 87.0]
Missing
12 (4.0%)
7 (2.3%)
3 (1.0%)
4 (1.9%)
26 (2.3%)
Code
postfemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 1 ), caption = "Postmenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
postfemalest1
Postmenopausal females
Age
Mean (SD)
55.3 (6.90)
56.0 (6.80)
55.3 (6.42)
55.8 (7.05)
55.6 (6.76)
Median [Q1, Q3]
54.0 [50.0, 61.0]
56.0 [50.0, 61.0]
55.0 [50.0, 60.0]
55.0 [50.0, 61.8]
55.0 [50.0, 61.0]
Hb
Mean (SD)
13.7 (0.856)
13.7 (0.825)
13.7 (0.848)
13.7 (0.822)
13.7 (0.838)
Median [Q1, Q3]
13.5 [13.1, 14.2]
13.7 [13.1, 14.2]
13.5 [13.1, 14.3]
13.5 [13.1, 14.0]
13.5 [13.1, 14.2]
Ferritin
Mean (SD)
34.9 (28.6)
32.2 (24.6)
29.9 (26.7)
31.1 (25.3)
32.0 (26.5)
Median [Q1, Q3]
29.3 [16.3, 41.5]
25.6 [15.0, 44.7]
22.4 [10.4, 39.2]
26.3 [15.0, 41.5]
26.1 [14.1, 41.5]
Missing
5 (2.5%)
5 (2.6%)
8 (3.5%)
3 (2.1%)
21 (2.7%)
factor(PICA)
No
196 (97.5)
192 (98.5)
218 (96.0)
139 (97.9)
745 (97.4)
Yes
4 (2.0)
1 (0.5)
4 (1.8)
2 (1.4)
11 (1.4)
Missing
1 (0.5%)
2 (1.0%)
5 (2.2%)
1 (0.7%)
9 (1.2%)
factor(RLS)
No
158 (78.6)
147 (75.4)
166 (73.1)
105 (73.9)
576 (75.3)
Yes
0 (0)
4 (2.1)
7 (3.1)
3 (2.1)
14 (1.8)
Missing
43 (21.4%)
44 (22.6%)
54 (23.8%)
34 (23.9%)
175 (22.9%)
CFQ_Total
Mean (SD)
26.4 (12.2)
25.3 (10.9)
25.0 (9.84)
26.0 (12.6)
25.6 (11.3)
Median [Q1, Q3]
26.0 [18.0, 34.0]
24.0 [17.0, 32.0]
25.0 [18.0, 32.0]
25.0 [16.0, 34.0]
25.0 [17.0, 33.0]
Missing
1 (0.5%)
2 (1.0%)
5 (2.2%)
1 (0.7%)
9 (1.2%)
SF_ph
Mean (SD)
87.4 (11.7)
87.8 (12.6)
88.0 (12.1)
88.5 (9.62)
87.9 (11.7)
Median [Q1, Q3]
89.9 [83.7, 96.2]
92.4 [84.9, 96.3]
91.3 [84.4, 96.3]
90.0 [85.6, 94.9]
91.2 [84.9, 95.0]
Missing
1 (0.5%)
2 (1.0%)
4 (1.8%)
1 (0.7%)
8 (1.0%)
SF_mh
Mean (SD)
81.4 (13.0)
81.6 (11.3)
81.2 (12.2)
81.3 (11.9)
81.4 (12.1)
Median [Q1, Q3]
85.4 [78.4, 88.8]
84.3 [79.0, 88.8]
84.6 [79.0, 87.8]
84.0 [80.3, 88.1]
84.7 [78.9, 88.5]
Missing
1 (0.5%)
1 (0.5%)
3 (1.3%)
0 (0%)
5 (0.7%)
Warmglow
No
76 (37.8)
57 (29.2)
70 (30.8)
48 (33.8)
251 (32.8)
Yes
118 (58.7)
129 (66.2)
148 (65.2)
92 (64.8)
487 (63.7)
Missing
7 (3.5%)
9 (4.6%)
9 (4.0%)
2 (1.4%)
27 (3.5%)
CIS_Totalmean
Mean (SD)
86.1 (6.12)
85.8 (7.03)
86.5 (6.44)
85.1 (5.72)
85.9 (6.39)
Median [Q1, Q3]
86.0 [84.0, 90.0]
86.0 [82.0, 89.0]
87.0 [83.2, 89.0]
85.1 [82.0, 89.0]
86.0 [83.0, 89.0]
Missing
1 (0.5%)
2 (1.0%)
5 (2.2%)
0 (0%)
8 (1.0%)
Code
malest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Male" ), caption = "Males" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
malest1
Males
Age
Mean (SD)
46.0 (15.2)
45.6 (15.2)
45.4 (15.2)
47.8 (15.4)
46.1 (15.2)
Median [Q1, Q3]
48.0 [32.0, 59.0]
48.0 [31.5, 59.0]
48.0 [32.0, 58.0]
51.0 [34.0, 61.0]
49.0 [32.0, 59.0]
Hb
Mean (SD)
15.0 (1.00)
15.0 (0.961)
14.9 (0.956)
14.8 (0.932)
14.9 (0.966)
Median [Q1, Q3]
15.0 [14.2, 15.6]
14.8 [14.3, 15.5]
14.7 [14.2, 15.6]
14.8 [14.0, 15.5]
14.8 [14.2, 15.6]
Missing
0 (0%)
2 (0.5%)
1 (0.2%)
0 (0%)
3 (0.2%)
Ferritin
Mean (SD)
52.9 (59.7)
43.1 (42.9)
40.7 (47.5)
37.1 (41.8)
43.2 (48.5)
Median [Q1, Q3]
35.3 [21.2, 61.7]
29.8 [15.1, 53.9]
26.7 [12.9, 46.2]
23.0 [12.2, 43.4]
28.4 [14.7, 50.7]
Missing
4 (1.2%)
9 (2.2%)
10 (2.0%)
13 (3.9%)
36 (2.3%)
factor(PICA)
No
333 (96.5)
393 (95.6)
471 (94.0)
321 (95.8)
1518 (95.4)
Yes
9 (2.6)
8 (1.9)
15 (3.0)
4 (1.2)
36 (2.3)
Missing
3 (0.9%)
10 (2.4%)
15 (3.0%)
10 (3.0%)
38 (2.4%)
factor(RLS)
No
290 (84.1)
341 (83.0)
403 (80.4)
278 (83.0)
1312 (82.4)
Yes
2 (0.6)
0 (0)
5 (1.0)
2 (0.6)
9 (0.6)
Missing
53 (15.4%)
70 (17.0%)
93 (18.6%)
55 (16.4%)
271 (17.0%)
CFQ_Total
Mean (SD)
25.2 (11.6)
23.9 (10.9)
24.5 (11.3)
23.4 (11.1)
24.3 (11.2)
Median [Q1, Q3]
25.0 [17.0, 32.0]
23.0 [17.0, 30.0]
24.0 [16.0, 31.0]
23.0 [17.0, 29.0]
24.0 [17.0, 31.0]
Missing
6 (1.7%)
10 (2.4%)
12 (2.4%)
13 (3.9%)
41 (2.6%)
SF_ph
Mean (SD)
90.6 (8.64)
91.3 (9.32)
91.7 (7.92)
90.6 (9.81)
91.1 (8.87)
Median [Q1, Q3]
92.5 [87.4, 96.3]
93.8 [88.8, 97.5]
93.8 [89.9, 96.3]
92.5 [87.5, 96.3]
93.7 [88.7, 96.3]
Missing
3 (0.9%)
6 (1.5%)
7 (1.4%)
5 (1.5%)
21 (1.3%)
SF_mh
Mean (SD)
82.1 (11.9)
83.3 (11.1)
82.2 (12.1)
83.5 (10.9)
82.7 (11.6)
Median [Q1, Q3]
85.4 [79.8, 89.0]
86.4 [80.9, 89.8]
85.5 [80.4, 88.8]
86.5 [81.0, 89.8]
85.8 [80.5, 89.0]
Missing
2 (0.6%)
5 (1.2%)
4 (0.8%)
5 (1.5%)
16 (1.0%)
Warmglow
No
119 (34.5)
123 (29.9)
164 (32.7)
110 (32.8)
516 (32.4)
Yes
211 (61.2)
272 (66.2)
323 (64.5)
207 (61.8)
1013 (63.6)
Missing
15 (4.3%)
16 (3.9%)
14 (2.8%)
18 (5.4%)
63 (4.0%)
CIS_Totalmean
Mean (SD)
85.1 (5.95)
86.1 (6.24)
85.3 (5.97)
85.8 (5.54)
85.6 (5.96)
Median [Q1, Q3]
85.3 [82.0, 88.0]
86.0 [82.1, 89.0]
86.0 [82.0, 89.0]
86.0 [83.0, 89.0]
86.0 [82.0, 89.0]
Missing
3 (0.9%)
9 (2.2%)
12 (2.4%)
9 (2.7%)
33 (2.1%)
Analysis
PICA
PICA is diagnosed when donor answers yes to: do you crave and regularly eat non-food items such as ice, clay, mud, sand, raw pasta, chalk or charcoal? (translated)
Models
Code
PICA_fitM <- glm (formula = PICA ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
PICA_fit_preF <- glm (formula = PICA ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
PICA_fit_postF <- glm (formula = PICA ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (PICA_fit_preF, PICA_fit_postF, PICA_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "PICA" )
PICA
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.00
0.00 – 0.02
<0.001
0.01
0.00 – 0.10
<0.001
0.01
0.00 – 0.04
<0.001
Age
0.93
0.88 – 0.97
0.001
0.99
0.93 – 1.05
0.691
1.00
0.98 – 1.01
0.646
Weight
1.03
1.00 – 1.05
0.019
1.02
0.99 – 1.06
0.135
1.02
1.01 – 1.04
0.007
Height
0.98
0.93 – 1.02
0.313
0.98
0.92 – 1.05
0.551
0.98
0.95 – 1.01
0.231
CalendarTime
1.04
0.96 – 1.13
0.285
1.03
0.92 – 1.16
0.576
1.04
0.98 – 1.11
0.156
6-11 months
0.99
0.45 – 2.07
0.989
0.50
0.08 – 1.97
0.381
0.64
0.30 – 1.24
0.217
12-17 months
0.77
0.30 – 1.72
0.548
1.68
0.60 – 4.44
0.298
1.00
0.55 – 1.72
0.986
18-23 months
1.04
0.38 – 2.55
0.940
1.38
0.20 – 6.13
0.699
0.92
0.39 – 1.95
0.843
24+ months
0.65
0.23 – 1.64
0.384
1.63
0.41 – 5.83
0.458
0.75
0.37 – 1.44
0.410
Observations
2345
1465
3545
R2 Tjur
0.010
0.004
0.003
Model assumptions
Premenopausal females:
Code
plot (PICA_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 14
.rownames PICA Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 3788 Yes -6.02 -11.2 -20.8 24
2 4453 Yes 1.98 -0.189 -13.8 24
3 7722 Yes -21.0 36.8 -1.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 6 × 14
.rownames PICA Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1410 Yes -14.0 -16.2 -2.76 16
2 3642 Yes -11.0 -23.2 -8.76 24
3 4028 Yes -0.0154 -10.2 -3.76 24
4 4453 Yes 1.98 -0.189 -13.8 24
5 5306 Yes -13.0 -10.2 1.24 16
6 7651 Yes 0.985 -20.2 -14.8 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
3788 3.0298257 0.004006847 0.036997569
4453 3.1954801 0.002673640 0.040731782
5873 -0.5691024 0.069276280 0.001496743
7651 3.1769470 0.002234815 0.033165230
7722 2.1343591 0.039130585 0.034808526
Code
car:: influenceIndexPlot (PICA_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
PICA_fit_preF_upd1 <- update (PICA_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_preF,PICA_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale, subset =
data_scale$Gender == "Female" & data_scale$PostMeno == 0)
2: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale2, subset =
data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -5.689 -5.906
SE 0.925 0.947
Age -0.0775 -0.0891
SE 0.0227 0.0244
Weight 0.0255 0.0202
SE 0.0109 0.0117
Height -0.02339 -0.00926
SE 0.02320 0.02377
CalendarTime 0.0435 0.0472
SE 0.0407 0.0407
Intervention_months_factor1 -0.00537 -0.00428
SE 0.38728 0.38744
Intervention_months_factor2 -0.263 -0.252
SE 0.437 0.437
Intervention_months_factor3 0.0359 -0.1363
SE 0.4761 0.5009
Intervention_months_factor4 -0.434 -0.858
SE 0.498 0.576
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5580 -0.2449 -0.2022 -0.1555 3.1378
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.68866 0.92479 -6.151 7.68e-10 ***
Age -0.07750 0.02266 -3.420 0.000626 ***
Weight 0.02549 0.01091 2.336 0.019494 *
Height -0.02339 0.02320 -1.008 0.313303
CalendarTime 0.04351 0.04067 1.070 0.284669
Intervention_months_factor1 -0.00537 0.38728 -0.014 0.988937
Intervention_months_factor2 -0.26255 0.43661 -0.601 0.547618
Intervention_months_factor3 0.03591 0.47612 0.075 0.939884
Intervention_months_factor4 -0.43421 0.49828 -0.871 0.383533
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 495.34 on 2336 degrees of freedom
(685 observations deleted due to missingness)
AIC: 513.34
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_preF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5069 -0.2431 -0.1958 -0.1418 3.2055
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.906188 0.946543 -6.240 4.38e-10 ***
Age -0.089057 0.024446 -3.643 0.000269 ***
Weight 0.020185 0.011738 1.720 0.085487 .
Height -0.009261 0.023770 -0.390 0.696826
CalendarTime 0.047201 0.040715 1.159 0.246334
Intervention_months_factor1 -0.004283 0.387443 -0.011 0.991181
Intervention_months_factor2 -0.252337 0.436701 -0.578 0.563381
Intervention_months_factor3 -0.136338 0.500938 -0.272 0.785495
Intervention_months_factor4 -0.857965 0.576016 -1.489 0.136360
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 491.23 on 2341 degrees of freedom
Residual deviance: 470.75 on 2333 degrees of freedom
(685 observations deleted due to missingness)
AIC: 488.75
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.057498 1 1.028347
Weight 1.161918 1 1.077923
Height 1.113346 1 1.055152
CalendarTime 1.338418 1 1.156900
Intervention_months_factor 1.339795 4 1.037241
Postmenopausal females:
Code
plot (PICA_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 14
.rownames PICA Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 16
2 3889 Yes 17.0 -19.2 -18.8 24
3 7354 Yes 25.0 -1.19 -3.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 3 × 14
.rownames PICA Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 16
2 6560 Yes 27.0 -9.19 -7.76 16
3 7354 Yes 25.0 -1.19 -3.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1968 3.2278441 0.005342350 0.0771398714
2897 -0.4156449 0.059185081 0.0006481046
4404 -0.2809267 0.105347916 0.0005546764
7354 3.2353289 0.005178806 0.0767341575
Code
car:: influenceIndexPlot (PICA_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 1968 , - 3889 , - 7354 ), ]
PICA_fit_postF_upd1 <- update (PICA_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_postF,PICA_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale, subset =
data_scale$Gender == "Female" & data_scale$PostMeno == 1)
2: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale2, subset =
data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.730 -5.689
SE 1.262 0.925
Age -0.0122 -0.0775
SE 0.0308 0.0227
Weight 0.0245 0.0255
SE 0.0164 0.0109
Height -0.0197 -0.0234
SE 0.0331 0.0232
CalendarTime 0.0332 0.0435
SE 0.0594 0.0407
Intervention_months_factor1 -0.68980 -0.00537
SE 0.78782 0.38728
Intervention_months_factor2 0.520 -0.263
SE 0.499 0.437
Intervention_months_factor3 0.3220 0.0359
SE 0.8317 0.4761
Intervention_months_factor4 0.491 -0.434
SE 0.662 0.498
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4090 -0.2093 -0.1732 -0.1451 3.1273
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.72967 1.26207 -3.748 0.000179 ***
Age -0.01223 0.03083 -0.397 0.691486
Weight 0.02451 0.01641 1.494 0.135166
Height -0.01973 0.03307 -0.597 0.550730
CalendarTime 0.03323 0.05937 0.560 0.575668
Intervention_months_factor1 -0.68980 0.78782 -0.876 0.381264
Intervention_months_factor2 0.52008 0.49949 1.041 0.297773
Intervention_months_factor3 0.32198 0.83170 0.387 0.698652
Intervention_months_factor4 0.49095 0.66207 0.742 0.458364
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 253.11 on 1464 degrees of freedom
Residual deviance: 247.06 on 1456 degrees of freedom
(608 observations deleted due to missingness)
AIC: 265.06
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_postF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5580 -0.2449 -0.2022 -0.1555 3.1378
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.68866 0.92479 -6.151 7.68e-10 ***
Age -0.07750 0.02266 -3.420 0.000626 ***
Weight 0.02549 0.01091 2.336 0.019494 *
Height -0.02339 0.02320 -1.008 0.313303
CalendarTime 0.04351 0.04067 1.070 0.284669
Intervention_months_factor1 -0.00537 0.38728 -0.014 0.988937
Intervention_months_factor2 -0.26255 0.43661 -0.601 0.547618
Intervention_months_factor3 0.03591 0.47612 0.075 0.939884
Intervention_months_factor4 -0.43421 0.49828 -0.871 0.383533
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 495.34 on 2336 degrees of freedom
(685 observations deleted due to missingness)
AIC: 513.34
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.038903 1 1.019266
Weight 1.163923 1 1.078853
Height 1.187553 1 1.089749
CalendarTime 1.360759 1 1.166516
Intervention_months_factor 1.376018 4 1.040706
Males:
Code
plot (PICA_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 14
.rownames PICA Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 Yes -9.02 66.8 23.2 16
2 6725 Yes -11.0 51.8 -2.76 24
3 8024 Yes 24.0 8.81 -24.8 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 0 × 14
# ℹ 14 variables: .rownames <chr>, PICA <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
# Height <dbl[,1]>, CalendarTime <dbl>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
897 2.3905813 0.01617391 0.0268557549
1192 2.9285628 0.00308557 0.0223814278
1294 -0.4267263 0.07747280 0.0009218200
3057 -0.4666771 0.03104989 0.0004153313
5564 2.9332826 0.00235762 0.0176906112
6725 2.3039370 0.02017608 0.0271110695
Code
car:: influenceIndexPlot (PICA_fitM, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 6621 , - 1294 , - 7465 ), ]
PICA_fitM_upd1 <- update (PICA_fitM, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fitM,PICA_fitM_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale, subset =
data_scale$Gender == "Male")
2: glm(formula = PICA ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale2, subset =
data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.422 -5.689
SE 0.623 0.925
Age -0.00331 -0.07750
SE 0.00722 0.02266
Weight 0.02224 0.02549
SE 0.00819 0.01091
Height -0.0187 -0.0234
SE 0.0156 0.0232
CalendarTime 0.0421 0.0435
SE 0.0297 0.0407
Intervention_months_factor1 -0.43874 -0.00537
SE 0.35505 0.38728
Intervention_months_factor2 -0.00496 -0.26255
SE 0.28831 0.43661
Intervention_months_factor3 -0.0805 0.0359
SE 0.4069 0.4761
Intervention_months_factor4 -0.281 -0.434
SE 0.342 0.498
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Male")
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4722 -0.2491 -0.2250 -0.2031 2.9061
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.421581 0.622889 -7.099 1.26e-12 ***
Age -0.003312 0.007220 -0.459 0.64648
Weight 0.022237 0.008192 2.714 0.00664 **
Height -0.018700 0.015627 -1.197 0.23144
CalendarTime 0.042118 0.029682 1.419 0.15591
Intervention_months_factor1 -0.438736 0.355053 -1.236 0.21657
Intervention_months_factor2 -0.004960 0.288315 -0.017 0.98628
Intervention_months_factor3 -0.080466 0.406920 -0.198 0.84325
Intervention_months_factor4 -0.281371 0.341693 -0.823 0.41025
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 875.12 on 3544 degrees of freedom
Residual deviance: 864.94 on 3536 degrees of freedom
(671 observations deleted due to missingness)
AIC: 882.94
Number of Fisher Scoring iterations: 6
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5580 -0.2449 -0.2022 -0.1555 3.1378
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.68866 0.92479 -6.151 7.68e-10 ***
Age -0.07750 0.02266 -3.420 0.000626 ***
Weight 0.02549 0.01091 2.336 0.019494 *
Height -0.02339 0.02320 -1.008 0.313303
CalendarTime 0.04351 0.04067 1.070 0.284669
Intervention_months_factor1 -0.00537 0.38728 -0.014 0.988937
Intervention_months_factor2 -0.26255 0.43661 -0.601 0.547618
Intervention_months_factor3 0.03591 0.47612 0.075 0.939884
Intervention_months_factor4 -0.43421 0.49828 -0.871 0.383533
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 495.34 on 2336 degrees of freedom
(685 observations deleted due to missingness)
AIC: 513.34
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066016 1 1.032480
Weight 1.214039 1 1.101834
Height 1.199862 1 1.095382
CalendarTime 1.225191 1 1.106884
Intervention_months_factor 1.237677 4 1.027013
Restless legs syndrome
Restless legs syndrome (RLS) diagnose was based of Burchell & Alleen, 2008. Validation of the self-completed Cambridge-Hopkins questionnaire (CH-RLSq) for ascertainment of restless legs syndrome (RLS) in a population survey.
Predict by ferritin
Code
# Compute the analysis of variance
data_scale$ RLS <- as.numeric (data_scale$ RLS)
means <- round (aggregate (Ferritin ~ RLS, data_scale, mean), 1 )
res.aov <- aov (RLS ~ Fergroup, data = data_scale)
summary (res.aov)
Df Sum Sq Mean Sq F value Pr(>F)
Fergroup 2 0.1 0.05452 0.756 0.47
Residuals 6404 461.7 0.07210
1748 observations deleted due to missingness
Code
#fit model
fit1 <- lm (RLS ~ 1 + Fergroup, data = data_scale)
summary (fit1)
Call:
lm(formula = RLS ~ 1 + Fergroup, data = data_scale)
Residuals:
Min 1Q Median 3Q Max
-0.08338 -0.08338 -0.07836 -0.07216 0.92784
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.072165 0.006612 162.148 <2e-16 ***
FergroupFerritin 15-30 0.011214 0.009125 1.229 0.219
FergroupFerritin > 30 0.006200 0.008264 0.750 0.453
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2685 on 6404 degrees of freedom
(1748 observations deleted due to missingness)
Multiple R-squared: 0.0002361, Adjusted R-squared: -7.613e-05
F-statistic: 0.7562 on 2 and 6404 DF, p-value: 0.4695
Code
data_scale <- data_scale %>% mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ))
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (RLS) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (RLS)), aes (Fergroup, Percentage, fill = RLS)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = RLS)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- res <- t.test (Ferritin ~ RLS, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by RLS
t = -0.68841, df = 6405, p-value = 0.4912
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-6.026412 2.893888
sample estimates:
mean in group No mean in group Yes
40.56889 42.13515
Code
ggplot (subset (data_scale, ! is.na (RLS)), aes (x= RLS, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "RLS" , y= "Ferritin" )
Models
Code
RLS_fitM <- glm (formula = RLS ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
RLS_fit_preF <- glm (formula = RLS ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
RLS_fit_postF <- glm (formula = RLS ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (RLS_fit_preF, RLS_fit_postF, RLS_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Restless Legs Syndrome" )
Restless Legs Syndrome
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.00
0.00 – 0.00
<0.001
0.00
0.00 – 0.00
<0.001
0.00
0.00 – 0.00
<0.001
Age
1.02
1.00 – 1.04
0.087
1.02
0.99 – 1.05
0.192
1.01
1.00 – 1.02
0.095
Weight
1.00
0.98 – 1.01
0.780
1.00
0.99 – 1.02
0.606
1.01
1.00 – 1.02
0.174
Height
1.00
0.98 – 1.03
0.824
0.99
0.96 – 1.02
0.336
1.00
0.98 – 1.02
0.812
CalendarTime
1.48
1.35 – 1.67
<0.001
1.33
1.24 – 1.43
<0.001
1.44
1.33 – 1.58
<0.001
6-11 months
1.22
0.76 – 1.96
0.405
1.53
0.96 – 2.41
0.072
0.78
0.50 – 1.19
0.268
12-17 months
1.25
0.71 – 2.14
0.433
0.57
0.29 – 1.05
0.086
0.71
0.43 – 1.12
0.151
18-23 months
0.95
0.57 – 1.56
0.839
0.49
0.23 – 0.98
0.057
0.82
0.51 – 1.29
0.401
24+ months
0.97
0.62 – 1.52
0.903
0.97
0.58 – 1.62
0.922
0.67
0.45 – 0.98
0.042
Observations
2140
1303
3316
R2 Tjur
0.055
0.083
0.041
Model assumptions
Premenopausal females:
Code
plot (RLS_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) #residuals not above 3, no further attention needed
# A tibble: 3 × 14
.rownames RLS Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1939 Yes -8.02 -8.19 -9.76 16
2 4905 Yes -11.0 16.8 -3.76 16
3 5828 Yes -23.0 -22.2 -0.760 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 6 × 14
.rownames RLS Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 582 Yes -19.0 -14.2 0.240 16
2 1939 Yes -8.02 -8.19 -9.76 16
3 2679 Yes -19.0 6.81 -4.76 16
4 4905 Yes -11.0 16.8 -3.76 16
5 4982 Yes -11.0 -6.19 -6.76 16
6 5828 Yes -23.0 -22.2 -0.760 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1939 3.1327912 0.001700358 0.0229408733
2679 3.2622789 0.001059712 0.0217764884
3945 -0.5404116 0.021673514 0.0003905762
4905 3.2239856 0.001278703 0.0230668823
5828 3.2644847 0.001113892 0.0229380085
7349 -0.5469749 0.023634589 0.0004383462
Code
car:: influenceIndexPlot (RLS_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
RLS_fit_preF_upd1 <- update (RLS_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_preF,RLS_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale, subset =
data_scale$Gender == "Female" & data_scale$PostMeno == 0)
2: glm(formula = RLS ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale2, subset =
data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -11.12 -11.11
SE 1.27 1.27
Age 0.0185 0.0204
SE 0.0109 0.0109
Weight -0.00207 -0.00423
SE 0.00740 0.00754
Height 0.00304 0.00323
SE 0.01372 0.01382
CalendarTime 0.3929 0.3928
SE 0.0532 0.0532
Intervention_months_factor1 0.202 0.203
SE 0.243 0.243
Intervention_months_factor2 0.221 0.218
SE 0.282 0.282
Intervention_months_factor3 -0.0522 -0.0906
SE 0.2578 0.2606
Intervention_months_factor4 -0.0279 -0.0194
SE 0.2282 0.2283
Code
Call:
glm(formula = RLS ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6613 -0.5266 -0.4640 -0.1131 3.2327
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -11.123138 1.269949 -8.759 < 2e-16 ***
Age 0.018547 0.010852 1.709 0.0874 .
Weight -0.002070 0.007403 -0.280 0.7798
Height 0.003043 0.013721 0.222 0.8245
CalendarTime 0.392904 0.053223 7.382 1.56e-13 ***
Intervention_months_factor1 0.202187 0.242825 0.833 0.4050
Intervention_months_factor2 0.220784 0.281836 0.783 0.4334
Intervention_months_factor3 -0.052247 0.257822 -0.203 0.8394
Intervention_months_factor4 -0.027921 0.228168 -0.122 0.9026
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1192.2 on 2139 degrees of freedom
Residual deviance: 1043.4 on 2131 degrees of freedom
(890 observations deleted due to missingness)
AIC: 1061.4
Number of Fisher Scoring iterations: 7
Code
summary (RLS_fit_preF_upd1)
Call:
glm(formula = RLS ~ Age + Weight + Height + CalendarTime + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6765 -0.5232 -0.4505 -0.1124 3.2451
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -11.113041 1.269958 -8.751 < 2e-16 ***
Age 0.020399 0.010888 1.873 0.061 .
Weight -0.004227 0.007537 -0.561 0.575
Height 0.003230 0.013816 0.234 0.815
CalendarTime 0.392812 0.053223 7.380 1.58e-13 ***
Intervention_months_factor1 0.202990 0.242892 0.836 0.403
Intervention_months_factor2 0.217917 0.281911 0.773 0.440
Intervention_months_factor3 -0.090577 0.260637 -0.348 0.728
Intervention_months_factor4 -0.019416 0.228330 -0.085 0.932
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1186.8 on 2136 degrees of freedom
Residual deviance: 1038.0 on 2128 degrees of freedom
(890 observations deleted due to missingness)
AIC: 1056
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (RLS_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.055281 1 1.027269
Weight 1.193267 1 1.092368
Height 1.145829 1 1.070434
CalendarTime 1.037148 1 1.018405
Intervention_months_factor 1.045871 4 1.005622
Postmenopausal females:
Code
plot (RLS_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 14
.rownames RLS Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1289 Yes 6.98 31.8 3.24 24
2 3919 Yes 5.98 9.81 9.24 24
3 4404 Yes 9.98 -16.2 -59.8 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 0 × 14
# ℹ 14 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>, Weight <dbl[,1]>,
# Height <dbl[,1]>, CalendarTime <dbl>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 -0.8090639 0.055491784 0.002571956
1581 2.7936341 0.001944223 0.010046730
2275 2.7838772 0.002414639 0.012036717
3919 2.2815686 0.013554924 0.017764507
4404 1.6213702 0.132121029 0.042070312
Code
car:: influenceIndexPlot (RLS_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 3919 , - 4404 ), ]
RLS_fit_postF_upd1 <- update (RLS_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_postF,RLS_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale, subset =
data_scale$Gender == "Female" & data_scale$PostMeno == 1)
2: glm(formula = RLS ~ Age + Weight + Height + CalendarTime +
Intervention_months_factor, family = binomial, data = data_scale2, subset =
data_scale2$Gender == "Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -8.54 -11.12
SE 0.88 1.27
Age 0.0177 0.0185
SE 0.0136 0.0109
Weight 0.00420 -0.00207
SE 0.00814 0.00740
Height -0.01453 0.00304
SE 0.01510 0.01372
CalendarTime 0.2829 0.3929
SE 0.0371 0.0532
Intervention_months_factor1 0.422 0.202
SE 0.234 0.243
Intervention_months_factor2 -0.564 0.221
SE 0.328 0.282
Intervention_months_factor3 -0.7066 -0.0522
SE 0.3707 0.2578
Intervention_months_factor4 -0.0257 -0.0279
SE 0.2629 0.2282
Code
#multicollinearity
car:: vif (RLS_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.027175 1 1.013497
Weight 1.153419 1 1.073973
Height 1.169699 1 1.081526
CalendarTime 1.063648 1 1.031333
Intervention_months_factor 1.084816 4 1.010228
Males:
Code
plot (RLS_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 14
.rownames RLS Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 440 Yes 13.0 -7.19 -3.76 16
2 2756 Yes -24.0 -8.19 -11.8 16
3 5646 Yes -16.0 -23.2 -6.76 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 0 influential data points
# A tibble: 9 × 14
.rownames RLS Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 276 Yes 17.0 19.8 14.2 16
2 440 Yes 13.0 -7.19 -3.76 16
3 2207 Yes 15.0 5.81 8.24 16
4 2756 Yes -24.0 -8.19 -11.8 16
5 4651 Yes 18.0 5.81 1.24 16
6 4927 Yes -1.02 18.8 15.2 16
7 5388 Yes -8.02 9.81 5.24 16
8 5646 Yes -16.0 -23.2 -6.76 16
9 6533 Yes 28.0 22.8 8.24 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
440 3.2737342 0.0008415159 0.0182370669
1294 -0.4296158 0.0419803536 0.0004798190
2756 3.2627845 0.0009699569 0.0201121181
3057 -0.5736578 0.0248343972 0.0005113124
5646 3.2829199 0.0008086676 0.0180756607
Code
car:: influenceIndexPlot (RLS_fitM, id.n= 3 )
Code
#multicollinearity
car:: vif (RLS_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.090103 1 1.044080
Weight 1.241184 1 1.114084
Height 1.236493 1 1.111977
CalendarTime 1.027943 1 1.013875
Intervention_months_factor 1.038287 4 1.004708
Cognitive functioning
Scoring: https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/CFQ-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ CFQ_Total, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CFQ_Total, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Cognitive functioning, higher is worse" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CFQ_Total)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Cognitive functioning, higher is worse" )
Models
Code
CFQ_fitM <- lm (formula = CFQ_Total ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
CFQ_fit_preF <- lm (formula = CFQ_Total ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_fit_postF <- lm (formula = CFQ_Total ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (CFQ_fit_preF, CFQ_fit_postF, CFQ_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning" )
Cognitive functioning
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
25.85
22.49 – 29.21
<0.001
25.42
21.99 – 28.85
<0.001
24.30
22.13 – 26.47
<0.001
Age
-0.25
-0.32 – -0.18
<0.001
-0.16
-0.25 – -0.08
<0.001
-0.14
-0.17 – -0.11
<0.001
Weight
0.01
-0.04 – 0.06
0.770
0.01
-0.04 – 0.07
0.584
-0.01
-0.04 – 0.02
0.577
Height
-0.08
-0.17 – 0.01
0.064
0.01
-0.08 – 0.11
0.801
-0.08
-0.14 – -0.02
0.007
CalendarTime
0.07
-0.09 – 0.23
0.366
0.14
-0.03 – 0.30
0.097
0.06
-0.04 – 0.17
0.252
6-11 months
-0.05
-1.59 – 1.49
0.951
0.42
-1.25 – 2.09
0.620
0.16
-0.97 – 1.30
0.776
12-17 months
-0.93
-2.45 – 0.59
0.230
1.09
-0.45 – 2.64
0.164
0.76
-0.31 – 1.83
0.163
18-23 months
-1.05
-3.12 – 1.02
0.319
1.40
-1.15 – 3.96
0.282
-0.95
-2.52 – 0.61
0.233
24+ months
1.20
-0.62 – 3.02
0.197
0.31
-1.87 – 2.48
0.783
-0.14
-1.40 – 1.11
0.821
Observations
2342
1464
3534
R2 / R2 adjusted
0.025 / 0.022
0.015 / 0.010
0.036 / 0.034
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance meh (heteroscedasticity)
plot (CFQ_fit_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CFQ_fit_preF, which = 3 ) # Linearity good, don't have constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fit_preF
BP = 39.827, df = 8, p-value = 3.451e-06
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_preF)
W = 0.98255, p-value = 2.352e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_preF, which = 4 , id.n = 10 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 275 60 -6.02 49.8 5.24 16
2 4065 73 -17.0 35.8 -10.8 24
3 7774 82 -22.0 -20.2 -6.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 19 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 671 78 -18.0 -17.2 -12.8 24
2 1146 76 -17.0 -23.2 -4.76 24
3 1366 87 -15.0 -11.2 -9.76 24
4 1414 77 -23.0 -8.19 0.240 16
5 1776 70 -16.0 8.81 3.24 16
6 3168 72 -14.0 19.8 -0.760 24
7 3514 86 -17.0 -15.2 -11.8 24
8 3611 92 -18.0 -13.2 -8.76 24
9 3662 83 -17.0 -23.2 -15.8 24
10 3812 82 -17.0 14.8 -8.76 24
11 4065 73 -17.0 35.8 -10.8 24
12 4595 78 -16.0 6.81 1.24 16
13 5244 92 -21.0 -20.2 -8.76 16
14 5572 72 -1.02 -3.19 -1.76 24
15 6447 75 -14.0 -8.19 6.24 24
16 6780 74 -18.0 6.81 -10.8 24
17 6859 72 -16.0 -18.2 -8.76 24
18 7737 73 -19.0 -6.19 -2.76 24
19 7774 82 -22.0 -20.2 -6.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# (log) transform to pass normality.
data_scale2 <- data_scale
data_scale2$ CFQ_Total <- log (data_scale2$ CFQ_Total)
table (data_scale2$ CFQ_Total)
-Inf 0 0.693147180559945 1.09861228866811
29 20 29 34
1.38629436111989 1.6094379124341 1.79175946922805 1.94591014905531
31 66 53 73
2.07944154167984 2.19722457733622 2.30258509299405 2.39789527279837
86 91 108 121
2.484906649788 2.56494935746154 2.63905732961526 2.70805020110221
147 143 150 180
2.77258872223978 2.83321334405622 2.89037175789616 2.94443897916644
193 199 229 246
2.99573227355399 3.04452243772342 3.09104245335832 3.13549421592915
230 266 225 290
3.17805383034795 3.2188758248682 3.25809653802148 3.29583686600433
262 293 273 269
3.3322045101752 3.36729582998647 3.40119738166216 3.43398720448515
265 279 252 236
3.46573590279973 3.49650756146648 3.52636052461616 3.55534806148941
190 218 205 175
3.58351893845611 3.61091791264422 3.63758615972639 3.66356164612965
176 162 130 111
3.68887945411394 3.71357206670431 3.73766961828337 3.76120011569356
130 113 84 105
3.78418963391826 3.80666248977032 3.8286413964891 3.85014760171006
60 76 62 59
3.87120101090789 3.89182029811063 3.91202300542815 3.93182563272433
50 44 45 35
3.95124371858143 3.97029191355212 3.98898404656427 4.00733318523247
25 36 21 18
4.02535169073515 4.04305126783455 4.06044301054642 4.07753744390572
19 17 19 18
4.0943445622221 4.11087386417331 4.12713438504509 4.14313472639153
15 17 8 9
4.15888308335967 4.17438726989564 4.18965474202643 4.20469261939097
5 3 9 7
4.21950770517611 4.23410650459726 4.24849524204936 4.26267987704132
5 2 3 4
4.27666611901606 4.29045944114839 4.30406509320417 4.31748811353631
4 2 1 1
4.33073334028633 4.34380542185368 4.35670882668959 4.36944785246702
1 2 2 1
4.39444915467244 4.40671924726425 4.4188406077966 4.45434729625351
1 2 1 1
4.46590811865458 4.52178857704904
1 2
Code
data_scale2[data_scale2 <= - Inf ] <- NA # Replace -Inf with NA
table (data_scale2$ CFQ_Total)
0 0.693147180559945 1.09861228866811 1.38629436111989
20 29 34 31
1.6094379124341 1.79175946922805 1.94591014905531 2.07944154167984
66 53 73 86
2.19722457733622 2.30258509299405 2.39789527279837 2.484906649788
91 108 121 147
2.56494935746154 2.63905732961526 2.70805020110221 2.77258872223978
143 150 180 193
2.83321334405622 2.89037175789616 2.94443897916644 2.99573227355399
199 229 246 230
3.04452243772342 3.09104245335832 3.13549421592915 3.17805383034795
266 225 290 262
3.2188758248682 3.25809653802148 3.29583686600433 3.3322045101752
293 273 269 265
3.36729582998647 3.40119738166216 3.43398720448515 3.46573590279973
279 252 236 190
3.49650756146648 3.52636052461616 3.55534806148941 3.58351893845611
218 205 175 176
3.61091791264422 3.63758615972639 3.66356164612965 3.68887945411394
162 130 111 130
3.71357206670431 3.73766961828337 3.76120011569356 3.78418963391826
113 84 105 60
3.80666248977032 3.8286413964891 3.85014760171006 3.87120101090789
76 62 59 50
3.89182029811063 3.91202300542815 3.93182563272433 3.95124371858143
44 45 35 25
3.97029191355212 3.98898404656427 4.00733318523247 4.02535169073515
36 21 18 19
4.04305126783455 4.06044301054642 4.07753744390572 4.0943445622221
17 19 18 15
4.11087386417331 4.12713438504509 4.14313472639153 4.15888308335967
17 8 9 5
4.17438726989564 4.18965474202643 4.20469261939097 4.21950770517611
3 9 7 5
4.23410650459726 4.24849524204936 4.26267987704132 4.27666611901606
2 3 4 4
4.29045944114839 4.30406509320417 4.31748811353631 4.33073334028633
2 1 1 1
4.34380542185368 4.35670882668959 4.36944785246702 4.39444915467244
2 2 1 1
4.40671924726425 4.4188406077966 4.45434729625351 4.46590811865458
2 1 1 1
4.52178857704904
2
Code
CFQFfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CFQFfactor_fix), col = "grey" )
qqline (resid (CFQFfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQFfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQFfactor_fix)
W = 0.9088, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see below.
#multicollinearity
car:: vif (CFQ_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.071236 1 1.035005
Weight 1.202096 1 1.096401
Height 1.135365 1 1.065535
CalendarTime 1.424756 1 1.193631
Intervention_months_factor 1.422811 4 1.045065
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance looks good (heteroscedasticity)
plot (CFQ_fit_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fit_postF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CFQ_fit_postF
BP = 14.037, df = 8, p-value = 0.08081
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_postF)
W = 0.99077, p-value = 5.719e-08
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3296 59 11.0 19.8 14.2 24
2 3940 0 6.98 -19.2 -22.8 24
3 4433 79 3.98 -8.19 -12.8 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 5 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 695 72 15.0 11.8 -6.76 24
2 1869 61 11.0 7.81 -7.76 16
3 4433 79 3.98 -8.19 -12.8 24
4 4695 60 16.0 0.811 -4.76 16
5 5404 58 21.0 3.81 -4.76 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQ_postF_factor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CFQ_postF_factor_fix), col = "grey" )
qqline (resid (CFQ_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQ_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CFQ_postF_factor_fix)
W = 0.93056, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035637 1 1.017663
Weight 1.153352 1 1.073942
Height 1.177418 1 1.085089
CalendarTime 1.317602 1 1.147869
Intervention_months_factor 1.333149 4 1.036597
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CFQ_fitM, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fitM, which = 3 ) # weird pattern below
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fitM)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fitM
BP = 64.993, df = 8, p-value = 4.842e-11
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fitM, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fitM)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fitM)
W = 0.98861, p-value = 2.914e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fitM, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 897 66 -9.02 66.8 23.2 16
2 2505 61 11.0 -3.19 -2.76 16
3 2736 65 -18.0 56.8 10.2 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 23 × 14
.rownames CFQ_Total Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 309 59 5.98 11.8 1.24 16
2 897 66 -9.02 66.8 23.2 16
3 1214 61 15.0 13.8 5.24 16
4 1266 59 6.98 1.81 12.2 24
5 1730 81 4.98 -14.2 -3.76 16
6 2505 61 11.0 -3.19 -2.76 16
7 2736 65 -18.0 56.8 10.2 16
8 2756 67 -24.0 -8.19 -11.8 16
9 2879 68 -18.0 1.81 8.24 24
10 3045 61 -18.0 11.8 18.2 24
# ℹ 13 more rows
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fitM)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQMfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Male" )
qqnorm (resid (CFQMfactor_fix), col = "grey" )
qqline (resid (CFQMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQMfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQMfactor_fix)
W = 0.89562, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.091051 1 1.044534
Weight 1.270207 1 1.127035
Height 1.250482 1 1.118249
CalendarTime 1.284652 1 1.133425
Intervention_months_factor 1.293101 4 1.032652
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
CFQ_preF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_postF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CFQ_M_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CFQ_preF_robust, CFQ_postF_robust, CFQ_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning, higher means more cognitive failure" , digits = 3 )
Cognitive functioning, higher means more cognitive failure
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
27.197
25.990 – 28.404
<0.001
27.331
25.745 – 28.918
<0.001
25.049
24.365 – 25.732
<0.001
Age
-0.210
-0.280 – -0.141
<0.001
-0.148
-0.240 – -0.056
0.002
-0.126
-0.153 – -0.098
<0.001
Weight
-0.005
-0.053 – 0.042
0.826
0.000
-0.053 – 0.053
0.997
-0.021
-0.059 – 0.016
0.269
Height
-0.073
-0.161 – 0.015
0.102
0.014
-0.087 – 0.114
0.791
-0.069
-0.129 – -0.008
0.026
6-12 months
0.144
-1.354 – 1.642
0.851
0.873
-0.767 – 2.512
0.297
0.047
-1.037 – 1.130
0.933
12-18 months
-1.458
-2.942 – 0.025
0.054
1.084
-0.502 – 2.670
0.180
0.807
-0.285 – 1.898
0.148
18-24 months
-0.924
-2.617 – 0.769
0.285
2.588
-0.055 – 5.231
0.055
-0.591
-2.138 – 0.955
0.454
24+ months
0.866
-0.823 – 2.555
0.315
0.859
-1.123 – 2.842
0.395
-0.238
-1.375 – 0.900
0.682
Observations
2342
1464
3534
R2 / R2 adjusted
0.022 / 0.019
0.012 / 0.007
0.033 / 0.031
Physical health
Scored by https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_ph, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Physical health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_ph)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Physical health (SF-36), higher is better" )
Models
Code
SF_ph_preF <- lm (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF <- lm (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_M <- lm (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF, SF_ph_postF, SF_ph_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Physical health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Physical health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
91.648
89.176 – 94.119
<0.001
85.089
81.728 – 88.451
<0.001
92.265
90.630 – 93.900
<0.001
Age
0.081
0.027 – 0.134
0.003
-0.026
-0.110 – 0.059
0.555
-0.031
-0.050 – -0.012
0.002
Weight
-0.154
-0.190 – -0.118
<0.001
-0.191
-0.243 – -0.138
<0.001
-0.113
-0.138 – -0.088
<0.001
Height
0.145
0.079 – 0.211
<0.001
0.049
-0.046 – 0.144
0.315
0.117
0.073 – 0.161
<0.001
CalendarTime
-0.057
-0.174 – 0.060
0.339
0.171
0.010 – 0.332
0.037
-0.053
-0.134 – 0.027
0.193
6-12 months
-0.130
-1.266 – 1.005
0.822
0.103
-1.527 – 1.734
0.901
0.177
-0.674 – 1.029
0.683
12-18 months
0.297
-0.817 – 1.411
0.601
-1.199
-2.711 – 0.313
0.120
-0.532
-1.339 – 0.274
0.196
18-24 months
-0.191
-1.710 – 1.328
0.805
-2.783
-5.276 – -0.289
0.029
0.159
-1.023 – 1.341
0.792
24+ months
-0.210
-1.552 – 1.132
0.759
0.088
-2.045 – 2.222
0.935
0.238
-0.706 – 1.181
0.621
Observations
2398
1468
3582
R2 / R2 adjusted
0.031 / 0.028
0.042 / 0.037
0.032 / 0.030
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_ph_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_ph_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_preF
BP = 26.092, df = 8, p-value = 0.001013
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_preF)
W = 0.79787, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3703 31.9 -2.02 24.8 8.24 24
2 4034 21.9 -22.0 -7.19 -14.8 24
3 4065 13.8 -17.0 35.8 -10.8 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 20 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 936 43.1 -8.02 9.81 -16.8 16
2 1407 23.8 -15.0 -17.2 -8.76 16
3 1659 28.1 -12.0 26.8 1.24 16
4 2032 34.4 -14.0 -11.2 -7.76 16
5 2111 41.2 -21.0 -5.19 3.24 16
6 2271 48.7 -21.0 -13.2 -7.76 16
7 2998 50.6 -22.0 -26.2 -11.8 24
8 3703 31.9 -2.02 24.8 8.24 24
9 3877 50.0 -19.0 -0.189 9.24 24
10 4034 21.9 -22.0 -7.19 -14.8 24
11 4065 13.8 -17.0 35.8 -10.8 24
12 4595 42.4 -16.0 6.81 1.24 16
13 5649 38.7 -1.02 -4.19 -18.8 16
14 6172 32.5 -21.0 4.81 -16.8 24
15 6286 52.5 -5.02 -10.2 -0.760 24
16 6414 45.0 -12.0 -20.2 -9.76 24
17 6859 47.5 -16.0 -18.2 -8.76 24
18 7026 42.6 -6.02 -13.2 -8.76 24
19 7246 44.4 -22.0 -13.2 -15.8 24
20 7301 43.2 -19.0 -18.2 -2.76 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_ph)
13.8010204081633 15.0510204081633 15.1020408163265 16.25
1 1 1 1
21.9132653061224 23.1122448979592 23.8010204081633 23.8520408163265
1 1 1 1
25.6122448979592 26.8622448979592 27.4744897959184 28.1122448979592
1 1 1 1
29.3622448979592 30.6122448979592 31.1734693877551 31.9132653061224
1 1 1 2
32.4744897959184 32.6020408163265 33.1632653061225 33.8010204081633
1 1 1 1
34.4132653061224 35.6122448979592 35.6632653061224 37.4234693877551
1 1 1 1
38.7244897959184 39.3622448979592 39.9744897959184 40.0255102040816
3 2 1 1
40.0510204081633 40.6122448979592 40.6632653061224 41.1734693877551
1 2 1 1
41.2244897959184 42.4234693877551 42.4744897959184 42.6020408163265
4 1 2 1
43.0867346938775 43.1632653061224 43.7244897959184 44.3367346938775
1 1 4 1
44.4132653061224 44.9744897959184 45.1020408163265 45.6122448979592
2 3 1 1
46.2244897959184 46.2755102040816 46.8367346938775 47.4744897959184
1 1 1 3
48.0357142857143 48.0867346938775 48.6734693877551 48.7244897959184
2 2 1 4
48.7755102040816 49.4132653061224 49.9744897959184 50.5357142857143
2 2 2 2
50.5867346938775 50.6122448979592 51.1734693877551 51.2244897959184
3 1 1 2
51.2755102040816 51.7857142857143 51.8367346938775 52.4744897959184
2 1 3 6
52.5255102040816 52.984693877551 53.0357142857143 53.0867346938775
1 1 3 5
53.1632653061224 53.7244897959184 53.7755102040816 54.3367346938775
1 3 1 4
54.3622448979592 54.3877551020408 54.4642857142857 54.9744897959184
2 1 1 1
55.0255102040816 55.5357142857143 55.5867346938775 55.6122448979592
2 2 6 1
55.6377551020408 55.6632653061224 55.765306122449 56.2244897959184
1 2 1 3
56.2755102040816 56.3010204081633 56.7857142857143 56.8367346938775
2 1 3 2
56.8877551020408 56.9132653061224 57.4234693877551 57.4744897959184
1 2 1 5
58.0357142857143 58.0867346938775 58.1122448979592 58.1377551020408
5 6 1 4
58.6989795918367 58.7244897959184 58.75 59.234693877551
1 4 2 1
59.2857142857143 59.3367346938775 59.3622448979592 59.3877551020408
3 4 1 1
59.8979591836735 59.9744897959184 60.5357142857143 60.5867346938775
2 4 2 5
60.6122448979592 60.6377551020408 61.0969387755102 61.1479591836735
1 4 2 1
61.2244897959184 61.25 61.7857142857143 61.8367346938775
3 1 2 8
61.8622448979592 61.8877551020408 62.4489795918367 62.4744897959184
1 3 1 1
63.0357142857143 63.0867346938775 63.1377551020408 63.1887755102041
6 7 3 1
63.2142857142857 63.6479591836735 63.6734693877551 63.6989795918367
1 1 1 2
63.7244897959184 63.75 64.234693877551 64.2857142857143
2 2 1 6
64.3367346938775 64.3877551020408 64.4132653061224 64.8469387755102
3 2 1 1
64.8979591836735 64.9744897959184 65 65.0255102040816
4 3 1 1
65.5357142857143 65.5867346938775 65.6377551020408 66.1479591836735
2 3 1 3
66.1989795918367 66.2244897959184 66.25 66.2755102040816
1 3 2 1
66.734693877551 66.7857142857143 66.8367346938775 66.8622448979592
1 4 8 1
66.8877551020408 67.3979591836735 67.4489795918367 67.4744897959184
2 5 2 3
67.5 68.0357142857143 68.0867346938775 68.1122448979592
2 1 9 1
68.1377551020408 68.5969387755102 68.6479591836735 68.6989795918367
2 4 1 7
68.7244897959184 68.75 69.234693877551 69.3367346938775
1 4 1 7
69.3877551020408 69.4387755102041 69.9489795918367 69.9744897959184
2 1 1 5
70.4336734693878 70.5357142857143 70.5867346938775 70.6377551020408
1 2 10 5
70.6632653061224 71.0969387755102 71.2244897959184 71.25
1 1 1 2
71.2755102040816 71.7857142857143 71.8367346938775 71.9132653061224
1 3 10 1
72.3979591836735 72.4489795918367 72.4744897959184 72.5
2 1 2 3
72.6275510204082 72.984693877551 73.0357142857143 73.0867346938775
1 1 10 20
73.1377551020408 73.1887755102041 73.2142857142857 73.5969387755102
7 1 1 1
73.6479591836735 73.6734693877551 73.6989795918367 73.7244897959184
6 1 3 2
73.75 73.7755102040816 73.8265306122449 74.234693877551
7 2 1 1
74.2857142857143 74.3367346938775 74.3877551020408 74.8469387755102
8 17 6 1
74.8979591836735 74.9489795918367 74.9744897959184 75
2 1 2 6
75.0255102040816 75.484693877551 75.5357142857143 75.5612244897959
1 1 10 1
75.5867346938775 75.6377551020408 75.6887755102041 75.9948979591837
18 12 1 1
76.0969387755102 76.1479591836735 76.1989795918367 76.2244897959184
1 2 7 3
76.25 76.2755102040816 76.6836734693878 76.7857142857143
8 2 1 5
76.8367346938775 76.8622448979592 76.8877551020408 76.9387755102041
17 1 13 2
76.9642857142857 77.2959183673469 77.3469387755102 77.3979591836735
1 1 2 7
77.4489795918367 77.4744897959184 77.5 78.0357142857143
3 1 9 10
78.0867346938775 78.1377551020408 78.5969387755102 78.6479591836735
33 15 1 6
78.6734693877551 78.6989795918367 78.7244897959184 78.75
1 9 3 15
78.7755102040816 79.2857142857143 79.3367346938775 79.3877551020408
1 14 31 13
79.8469387755102 79.8979591836735 79.9489795918367 79.9744897959184
1 5 10 1
80 80.0255102040816 80.0765306122449 80.484693877551
9 1 1 1
80.5357142857143 80.5867346938775 80.6377551020408 80.6632653061224
20 56 20 1
80.6887755102041 81.0969387755102 81.1479591836735 81.1989795918367
3 1 9 14
81.2244897959184 81.25 81.3265306122449 81.734693877551
2 16 1 1
81.7857142857143 81.8367346938775 81.8877551020408 81.9387755102041
17 53 24 2
82.3469387755102 82.3979591836735 82.4489795918367 82.5
1 15 20 17
82.5255102040816 82.984693877551 83.0357142857143 83.0867346938775
1 1 11 53
83.1377551020408 83.1887755102041 83.5969387755102 83.6479591836735
28 3 3 23
83.6989795918367 83.75 83.7755102040816 83.8265306122449
19 27 1 1
83.8775510204082 84.2857142857143 84.3367346938775 84.3877551020408
1 6 68 45
84.4387755102041 84.8469387755102 84.8979591836735 84.9489795918367
6 4 42 39
85 85.0255102040816 85.484693877551 85.5357142857143
36 1 1 5
85.5867346938775 85.6377551020408 85.6887755102041 86.0969387755102
60 47 1 5
86.1479591836735 86.1989795918367 86.2244897959184 86.25
40 64 1 55
86.734693877551 86.7857142857143 86.8367346938775 86.8877551020408
1 7 39 56
86.9387755102041 87.3469387755102 87.3979591836735 87.4489795918367
6 3 42 103
87.5 87.5255102040816 88.0357142857143 88.0867346938775
89 1 3 27
88.1377551020408 88.1887755102041 88.5969387755102 88.6479591836735
54 3 1 45
88.6989795918367 88.75 89.2857142857143 89.3367346938775
130 109 2 23
89.3877551020408 89.4387755102041 89.8469387755102 89.8979591836735
50 4 8 45
89.9489795918367 90 90.5867346938775 90.6377551020408
169 207 19 41
90.6887755102041 91.0969387755102 91.1479591836735 91.1989795918367
7 2 43 203
91.25 91.3775510204082 91.8367346938775 91.8877551020408
268 1 9 36
91.9387755102041 92.3469387755102 92.3979591836735 92.4489795918367
3 1 25 192
92.5 93.1377551020408 93.1887755102041 93.6479591836735
391 29 3 16
93.6989795918367 93.75 94.3877551020408 94.4387755102041
189 450 6 4
94.8979591836735 94.9489795918367 95 95.6887755102041
13 157 599 3
96.1989795918367 96.25 97.4489795918367 97.5
114 551 48 576
98.75 100
518 429
Code
data_scale2$ SF_ph <- log10 (data_scale$ SF_ph)
table (data_scale2$ SF_ph)
1.13991119808611 1.17756594462169 1.17903563970246 1.21085336531489
1 1 1 1
1.34070709681079 1.36384213065636 1.37659557672604 1.37752554385206
1 1 1 1
1.40844764578854 1.42914230416503 1.43892963627752 1.44889552749531
1 1 1 1
1.46778925660933 1.48589517902717 1.49378513888608 1.50397124267296
1 1 1 2
1.5115423366332 1.51324478680192 1.52065728528638 1.52892981125237
1 1 1 1
1.53672588265145 1.55159935126669 1.55222110438921 1.57314404682283
1 1 1 1
1.587985704539 1.59507985904269 1.60178292944813 1.60233687656648
3 2 1 1
1.60261358538878 1.60865699638119 1.60920225003964 1.61461746336559
1 2 1 1
1.61515528941811 1.62760618219906 1.62812817082188 1.62943040412713
4 1 2 1
1.63434358255055 1.63511429168255 1.64072475056672 1.64676370509219
1 1 4 1
1.64751270409687 1.65296624527886 1.6541961936566 1.65908144743944
2 3 1 1
1.66487212632034 1.66535121570362 1.67058660984477 1.67646030611031
1 1 1 3
1.68156425299621 1.68202528752135 1.68729230334762 1.68774730022727
2 2 1 4
1.68820182091962 1.69384355369865 1.69874836897428 1.70359840851809
2 2 2 2
1.70403664718485 1.7042556007977 1.70904486166394 1.70947764145252
3 1 1 2
1.70990999040003 1.71420997089276 1.71463763659142 1.71994822467427
2 1 3 6
1.72037027959757 1.72415042951464 1.72456842231101 1.72498601319117
1 1 3 5
1.72561164760703 1.73017229982901 1.73058453952005 1.73509353641828
1 3 1 4
1.73529738269374 1.73550113333408 1.73611181234059 1.74016120747629
2 1 1 1
1.74056407808209 1.74457236202064 1.7449711632258 1.74517042658415
2 2 6 1
1.74536959855824 1.74556867923187 1.74636409059323 1.74992552315929
1 2 1 3
1.75031944108371 1.7505162661412 1.75423909297823 1.75462911948123
2 1 3 2
1.7550187960277 1.75521350326338 1.75908942798006 1.75947512470337
1 2 1 5
1.76369533397267 1.76407696359469 1.76426765272262 1.76445825815992
5 6 1 4
1.76863055164819 1.76881925227332 1.76900787094377 1.7725761483821
1 4 2 1
1.77295005669784 1.77332364337197 1.77351031626627 1.77369690895739
3 4 1 1
1.77741202555512 1.77796656210448 1.78201167119688 1.78237754694043
2 4 2 5
1.7825603692887 1.78274311470772 1.78601945073012 1.7863819670132
1 4 2 1
1.78692517469115 1.78710609303657 1.79088807178658 1.79124654847379
3 1 2 8
1.79142567591783 1.7916047295101 1.79552534645307 1.79570271810426
1 3 1 1
1.79958667838162 1.79993804934084 1.80028913624913 1.80063993956538
6 7 3 1
1.80081523501959 1.80378448293895 1.80395851398993 1.80413247533089
1 1 1 2
1.80430636701766 1.80448018910599 1.80776965875139 1.80811447376109
2 2 1 6
1.80845901521661 1.80880328355164 1.80897531543422 1.81188947919753
3 2 1 1
1.81223103995592 1.81274287794316 1.81291335664286 1.81308376844881
4 3 1 1
1.81647803724589 1.8168160096224 1.81715371918989 1.82051644974889
2 3 1 3
1.82085129516402 1.82101862110787 1.82118588260885 1.82135307971655
1 3 2 1
1.82435167263177 1.82468357519428 1.82501522429929 1.82518095392614
1 4 8 1
1.82534662033361 1.82864674625805 1.82897538379315 1.82913960935075
2 5 2 3
1.82930377283102 1.83273694866942 1.83306250676705 1.83322519434412
2 1 9 1
1.83338782100092 1.83630473520284 1.8366276307433 1.83695028639105
2 4 1 7
1.83711152436651 1.8372727025023 1.84032377630326 1.84096338537602
1 4 1 7
1.84128283701374 1.84160205384686 1.84478138343304 1.84493974058407
2 1 1 5
1.84778033961881 1.84840906862026 1.84872309212049 1.84903688872512
1 2 10 5
1.84919370204399 1.85185090169285 1.85262934693067 1.85278486868055
1 1 1 2
1.85294033475771 1.85603802607827 1.85634658344962 1.85680900885115
1 3 10 1
1.859726324101 1.86003227302658 1.86018516670248 1.86033800657099
2 1 2 3
1.8611014001265 1.86323179078481 1.86353528100114 1.86383855928295
1 1 10 20
1.86414162592603 1.86444448122554 1.86459582971354 1.86685975047129
7 1 1 1
1.86716071686026 1.86731112187714 1.86746147482374 1.86761177573609
6 1 3 2
1.8677620246502 1.86791222160204 1.86821245976256 1.87060692196545
7 2 1 1
1.87090530362054 1.87120348041351 1.87150145262548 1.87417404248681
8 17 6 1
1.87446998422358 1.87476572443378 1.87491351905216 1.8750612633917
2 1 2 6
1.87520895748661 1.87785889814018 1.87815234036884 1.87829898716473
1 1 10 1
1.87844558445959 1.87873863067982 1.87903147929638 1.88078443619459
18 12 1 1
1.88136718634161 1.88165826844493 1.88194915558367 1.8820945261229
1 2 7 3
1.88223984801882 1.88238512130397 1.88470290923043 1.88528042857339
8 2 1 5
1.88556890050821 1.8857130646529 1.88585718095816 1.88614527017728
17 1 13 2
1.88628924315453 1.88815656148185 1.88844312993956 1.88872950943025
1 1 2 7
1.88901570020299 1.88915872489781 1.88930170250631 1.89229340996422
3 1 9 10
1.89257726257688 1.89286092978612 1.89540563129648 1.89568745770605
33 15 1 6
1.89582830235846 1.8959691013488 1.89610985470667 1.89625056246164
1 9 3 15
1.89639122464324 1.89919494310842 1.89947432200638 1.89975352129719
1 14 31 13
1.90225827052599 1.90253568636545 1.90281292511211 1.90295147814628
1 5 10 1
1.90308998699194 1.90322845167729 1.90350524867959 1.9057132965597
9 1 1 1
1.90598851487176 1.90626355888469 1.90653842881912 1.90667579857573
20 56 20 1
1.90681312489527 1.90900446089432 1.90927760208691 1.90955057160055
3 1 9 14
1.90968699204517 1.90982336965091 1.91023224570362 1.91240644039174
2 16 1 1
1.91267745099767 1.91294829259167 1.91321896538441 1.91348946958619
17 53 24 2
1.91564745902958 1.91591645531065 1.91618528508209 1.91645394854993
1 15 20 17
1.91658821798426 1.9189979962614 1.91926492588375 1.91953169154442
1 1 11 53
1.91979829344469 1.9200647317855 1.92219037436192 1.92245534964891
28 3 3 23
1.92272016336559 1.92298481570888 1.92311708142695 1.92338149207859
19 27 1 1
1.92364574184756 1.92575397162789 1.92601678221497 1.92627943386005
1 6 68 45
1.92654192675526 1.92863617786304 1.92889725059823 1.92915816648586
6 4 42 39
1.92941892571429 1.92954924664007 1.93188836081481 1.93214748640836
36 1 1 5
1.93240645748455 1.93266527422756 1.93292393682121 1.93498771014659
60 47 1 5
1.93524499361495 1.93550212475444 1.9356306332572 1.93575910374531
40 64 1 55
1.9381928500218 1.93844824225609 1.93870348439209 1.93895857660613
1 7 39 56
1.93921351907421 1.94124768898466 1.94150129160903 1.9417547462307
6 3 42 103
1.94200805302231 1.94213465103572 1.94465889227103 1.944910511329
89 1 3 27
1.94516198468976 1.94541331252195 1.94741871629031 1.94766874190568
54 3 1 45
1.9479186236628 1.94816836172713 1.95078197732982 1.95103007472697
130 109 2 23
1.95127803047559 1.95152584473732 1.9535032846108 1.95374983271955
50 4 8 45
1.95399624094285 1.95424250943932 1.95706460528116 1.95730914046887
169 207 19 41
1.95755353804533 1.95950378317232 1.95974694918198 1.95998997911664
7 2 43 203
1.96023287312851 1.96083951449256 1.96301643374683 1.96325764146306
268 1 9 36
1.96349871528657 1.96542250351271 1.96566237895758 1.96590212198432
3 1 25 192
1.96614173273903 1.96912576592927 1.96936360519146 1.97149831748353
391 29 3 16
1.97173486132484 1.97197127639976 1.97491565704654 1.97515034739643
189 450 6 4
1.97725687286144 1.97749030177429 1.97772360528885 1.98086099713027
13 157 599 3
1.98317046538516 1.98340073818054 1.98877729589125 1.98900461569854
114 551 48 576
1.9945371042985 2
518 429
Code
SF_ph_preF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (SF_ph_preF_factor_fix), col = "grey" )
qqline (resid (SF_ph_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_preF_factor_fix)
W = 0.67074, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.070805 1 1.034797
Weight 1.206691 1 1.098495
Height 1.141525 1 1.068422
CalendarTime 1.417887 1 1.190751
as.factor(Intervention_months_factor) 1.416789 4 1.044511
Postmenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_ph_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_postF
BP = 27.869, df = 8, p-value = 0.0004997
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_postF)
W = 0.80674, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 4246 16.2 14.0 11.8 7.24 24
2 4440 29.4 4.98 8.81 4.24 24
3 6837 32.6 6.98 -14.2 -13.8 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 14 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 289 33.2 2.98 12.8 1.24 16
2 1423 40.7 4.98 -13.2 -6.76 16
3 2400 41.2 23.0 -8.19 -0.760 16
4 2408 27.5 17.0 6.81 -1.76 16
5 2658 38.7 28.0 -9.19 -19.8 16
6 3087 38.7 12.0 -7.19 -6.76 24
7 3814 23.9 6.98 9.81 -2.76 24
8 4246 16.2 14.0 11.8 7.24 24
9 4440 29.4 4.98 8.81 4.24 24
10 5472 35.6 11.0 14.8 7.24 16
11 5626 42.5 6.98 -1.19 -2.76 16
12 5887 40.0 9.98 17.8 2.24 16
13 6837 32.6 6.98 -14.2 -13.8 24
14 6898 41.2 15.0 -8.19 -10.8 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_postF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_postF_factor_fix), col = "grey" )
qqline (resid (SF_ph_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_postF_factor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.033532 1 1.016628
Weight 1.151317 1 1.072995
Height 1.172881 1 1.082996
CalendarTime 1.314109 1 1.146346
as.factor(Intervention_months_factor) 1.329459 4 1.036238
Males:
Code
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Bad
plot (SF_ph_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_M
BP = 33.655, df = 8, p-value = 4.692e-05
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_M)
W = 0.77073, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1657 15.1 20.0 1.81 -4.76 16
2 3057 15.1 13.0 71.8 2.24 24
3 3062 23.1 19.0 23.8 -11.8 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 84 × 14
.rownames SF_ph Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 407 56.8 25.0 -3.19 3.24 16
2 437 40.6 -14.0 -3.19 12.2 16
3 440 49.4 13.0 -7.19 -3.76 16
4 581 63.1 -12.0 -7.19 1.24 16
5 635 47.5 20.0 9.81 1.24 16
6 662 40.0 24.0 16.8 1.24 24
7 755 26.9 -20.0 7.81 6.24 24
8 759 44.4 12.0 -6.19 3.24 24
9 823 58.0 16.0 46.8 7.24 24
10 825 59.3 27.0 -8.19 -9.76 24
# ℹ 74 more rows
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_Mfactor_fix <- lm (SF_ph~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_Mfactor_fix), col = "grey" )
qqline (resid (SF_ph_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_Mfactor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092484 1 1.045220
Weight 1.274414 1 1.128900
Height 1.251347 1 1.118636
CalendarTime 1.283819 1 1.133057
as.factor(Intervention_months_factor) 1.292380 4 1.032580
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_ph_preF_robust <- lmrob (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF_robust <- lmrob (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_Mrobust <- lmrob (SF_ph ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF_robust, SF_ph_postF_robust, SF_ph_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Physical health (Sf-36), higher score indicates better health (robust regression)" , digits = 3 )
Physical health (Sf-36), higher score indicates better health (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
93.039
91.431 – 94.647
<0.001
91.354
89.119 – 93.589
<0.001
94.542
93.475 – 95.609
<0.001
Age
0.056
0.022 – 0.091
0.001
-0.053
-0.105 – 0.000
0.050
-0.026
-0.038 – -0.013
<0.001
Weight
-0.098
-0.125 – -0.071
<0.001
-0.107
-0.141 – -0.073
<0.001
-0.067
-0.084 – -0.049
<0.001
Height
0.067
0.023 – 0.111
0.003
0.036
-0.023 – 0.095
0.226
0.055
0.026 – 0.085
<0.001
CalendarTime
-0.029
-0.104 – 0.046
0.445
0.027
-0.076 – 0.129
0.612
-0.070
-0.123 – -0.017
0.009
6-12 months
-0.007
-0.729 – 0.716
0.986
-0.392
-1.433 – 0.649
0.460
-0.323
-0.876 – 0.231
0.253
12-18 months
0.309
-0.408 – 1.026
0.398
-1.107
-2.134 – -0.079
0.035
-0.461
-0.997 – 0.074
0.091
18-24 months
0.255
-0.717 – 1.226
0.607
-1.149
-2.796 – 0.498
0.171
-0.017
-0.783 – 0.749
0.965
24+ months
-0.494
-1.404 – 0.416
0.287
0.640
-0.675 – 1.955
0.340
0.215
-0.368 – 0.797
0.470
Observations
2398
1468
3582
R2 / R2 adjusted
0.033 / 0.029
0.040 / 0.035
0.034 / 0.032
Mental health
Coded using https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_mh, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Mental health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_mh)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Mental health (SF-36), higher is better" )
Models
Code
SF_mh_preF<- lm (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF <- lm (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_M <- lm (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF, SF_mh_postF, SF_mh_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Mental health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
84.433
80.662 – 88.203
<0.001
79.379
75.722 – 83.037
<0.001
85.086
82.825 – 87.347
<0.001
Age
0.274
0.192 – 0.355
<0.001
0.269
0.177 – 0.361
<0.001
0.166
0.140 – 0.193
<0.001
Weight
-0.053
-0.108 – 0.002
0.060
-0.087
-0.143 – -0.030
0.003
-0.027
-0.062 – 0.008
0.130
Height
0.091
-0.010 – 0.191
0.077
0.101
-0.002 – 0.205
0.056
0.073
0.012 – 0.134
0.019
CalendarTime
-0.221
-0.400 – -0.043
0.015
-0.098
-0.273 – 0.077
0.274
-0.197
-0.308 – -0.086
0.001
6-12 months
-0.912
-2.646 – 0.822
0.302
0.732
-1.045 – 2.509
0.419
-0.237
-1.415 – 0.942
0.694
12-18 months
1.196
-0.501 – 2.893
0.167
0.286
-1.359 – 1.932
0.733
-0.663
-1.778 – 0.451
0.243
18-24 months
-1.103
-3.419 – 1.214
0.351
-0.100
-2.821 – 2.621
0.943
0.073
-1.561 – 1.707
0.930
24+ months
0.367
-1.682 – 2.417
0.725
0.835
-1.482 – 3.151
0.480
0.377
-0.926 – 1.680
0.571
Observations
2404
1476
3595
R2 / R2 adjusted
0.025 / 0.022
0.029 / 0.024
0.045 / 0.043
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_mh_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_mh_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_preF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_preF
BP = 26.716, df = 8, p-value = 0.0007915
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_preF , 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_preF)
W = 0.82867, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3925 18.2 -2.02 6.81 7.24 24
2 4065 11.2 -17.0 35.8 -10.8 24
3 7349 33.4 -13.0 49.8 -0.760 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 6 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1659 15.6 -12.0 26.8 1.24 16
2 2029 11.9 -23.0 -13.2 -1.76 16
3 3925 18.2 -2.02 6.81 7.24 24
4 4065 11.2 -17.0 35.8 -10.8 24
5 4595 14.9 -16.0 6.81 1.24 16
6 6838 8.38 -20.0 -11.2 -1.76 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_mh)
3 8.375 11.25 11.875
1 1 1 1
12.5 13.75 14.875 15.25
1 1 1 1
15.625 16.25 16.875 17.125
1 1 1 2
17.25 17.5 18.25 18.375
2 1 1 1
19.25 19.5 20 20.5
2 1 1 2
20.625 21.125 22.25 22.3333333333333
1 1 1 1
22.625 23 24.125 24.25
3 1 1 1
24.5 24.625 24.75 24.875
1 1 1 1
25.5 25.625 25.75 25.875
2 1 3 1
25.9583333333333 26.25 26.375 26.5
1 2 2 2
26.75 27.125 27.25 27.625
1 2 2 2
27.75 28 28.2083333333333 28.25
2 2 1 1
28.3333333333333 28.375 28.5 28.625
1 1 2 2
29.25 29.625 29.7083333333333 29.9583333333333
1 2 1 2
30.125 30.2083333333333 30.25 30.5
2 1 1 2
30.75 31 31.125 31.25
1 2 1 2
31.5 31.625 31.875 32.125
3 1 4 3
32.25 32.3333333333333 32.375 32.5
2 1 1 1
32.75 32.875 33.125 33.375
3 1 2 2
33.5 33.5833333333333 33.625 33.7083333333333
3 1 2 1
33.75 33.875 33.9583333333333 34
4 6 1 1
34.125 34.25 34.375 34.5
1 1 5 1
34.625 34.7083333333333 34.75 34.875
2 2 1 1
35 35.125 35.2083333333333 35.25
3 4 1 2
35.3333333333333 35.375 35.5 35.625
1 2 2 1
35.75 36.0833333333333 36.125 36.2083333333333
1 1 2 1
36.25 36.375 36.625 36.75
2 1 7 1
36.875 36.9583333333333 37.0833333333333 37.125
2 1 1 1
37.4583333333333 37.5 37.625 37.875
1 1 4 1
37.9583333333333 38.25 38.375 38.4583333333333
1 1 2 4
38.5 38.5833333333333 38.625 38.75
2 1 1 1
38.8333333333333 38.875 38.9583333333333 39
1 2 1 1
39.125 39.2083333333333 39.25 39.375
1 1 1 1
39.5 39.5833333333333 39.625 39.75
2 1 2 2
39.875 40.0833333333333 40.125 40.2083333333333
1 1 1 1
40.3333333333333 40.375 40.5 40.5833333333333
2 1 1 1
40.625 40.7083333333333 40.75 40.875
1 1 3 2
41 41.125 41.1666666666667 41.2083333333333
2 3 1 1
41.25 41.4583333333333 41.5 41.5833333333333
1 1 1 1
41.7083333333333 41.75 41.8333333333333 41.9583333333333
3 2 2 1
42 42.0833333333333 42.125 42.2083333333333
1 3 3 2
42.25 42.3333333333333 42.4583333333333 42.5
1 1 3 1
42.7083333333333 42.75 42.875 42.9583333333333
1 9 4 1
43 43.125 43.25 43.3333333333333
1 2 2 1
43.375 43.5 43.625 43.7083333333333
2 2 1 1
43.75 43.9583333333333 44 44.125
4 2 3 2
44.1666666666667 44.2083333333333 44.25 44.3333333333333
1 2 1 2
44.375 44.4583333333333 44.625 44.7083333333333
2 2 1 1
44.8333333333333 45.2083333333333 45.25 45.3333333333333
1 1 2 1
45.375 45.4583333333333 45.5416666666667 45.5833333333333
4 1 1 3
45.7083333333333 45.875 46.5 46.5416666666667
2 1 2 1
46.5833333333333 46.625 46.7083333333333 46.75
3 1 2 2
46.8333333333333 46.9583333333333 47.0416666666667 47.125
3 1 1 1
47.2083333333333 47.25 47.4583333333333 47.5
1 2 2 3
47.5833333333333 47.6666666666667 47.7083333333333 47.8333333333333
1 1 1 2
47.875 47.9583333333333 48 48.125
3 1 2 1
48.2083333333333 48.25 48.2916666666667 48.3333333333333
6 1 1 1
48.4583333333333 48.5 48.5416666666667 48.5833333333333
2 5 1 1
48.8333333333333 48.9583333333333 49.0833333333333 49.125
5 2 1 2
49.2083333333333 49.25 49.375 49.4583333333333
1 2 3 4
49.5 49.625 49.7083333333333 49.875
1 2 2 1
49.9583333333333 50.0833333333333 50.1666666666667 50.2083333333333
1 3 1 1
50.2916666666667 50.3333333333333 50.375 50.4583333333333
1 1 3 1
50.5833333333333 50.625 50.7083333333333 50.7916666666667
1 1 1 2
50.8333333333333 50.875 50.9166666666667 50.9583333333333
1 1 2 2
51.0416666666667 51.0833333333333 51.2083333333333 51.25
1 1 2 3
51.3333333333333 51.375 51.4583333333333 51.7083333333333
2 1 2 4
51.75 51.7916666666667 51.8333333333333 51.9166666666667
2 4 3 1
52 52.0416666666667 52.0833333333333 52.125
1 1 2 1
52.1666666666667 52.2083333333333 52.25 52.3333333333333
1 1 1 1
52.375 52.4583333333333 52.5833333333333 52.625
1 2 3 1
52.6666666666667 52.7083333333333 52.75 52.7916666666667
1 1 2 3
52.875 52.9583333333333 53 53.0416666666667
2 1 1 4
53.0833333333333 53.1666666666667 53.2083333333333 53.25
3 1 4 1
53.3333333333333 53.4583333333333 53.5 53.5416666666667
1 2 4 1
53.5833333333333 53.7083333333333 53.75 53.7916666666667
4 1 1 1
53.8333333333333 53.875 53.9583333333333 54.125
2 1 2 1
54.1666666666667 54.2083333333333 54.2916666666667 54.3333333333333
1 2 4 1
54.4166666666667 54.4583333333333 54.5833333333333 54.625
3 3 2 1
54.6666666666667 54.75 54.7916666666667 54.8333333333333
1 1 2 3
54.875 54.9166666666667 54.9583333333333 55
2 3 1 2
55.0833333333333 55.1666666666667 55.2083333333333 55.25
3 1 1 2
55.2916666666667 55.4166666666667 55.4583333333333 55.7083333333333
2 1 2 3
55.75 55.7916666666667 55.8333333333333 55.9166666666667
1 5 2 1
56 56.0416666666667 56.0833333333333 56.1666666666667
1 1 1 2
56.2083333333333 56.25 56.2916666666667 56.3333333333333
2 1 1 1
56.375 56.4583333333333 56.5416666666667 56.5833333333333
1 3 4 3
56.625 56.6666666666667 56.7083333333333 56.75
1 1 4 1
56.8333333333333 56.875 56.9166666666667 56.9583333333333
2 1 2 3
57 57.0416666666667 57.0833333333333 57.1666666666667
1 3 2 2
57.2083333333333 57.3333333333333 57.4166666666667 57.4583333333333
2 5 2 2
57.5416666666667 57.5833333333333 57.6666666666667 57.7083333333333
3 1 1 2
57.7916666666667 57.8333333333333 57.9166666666667 57.9583333333333
2 1 1 1
58 58.0833333333333 58.125 58.1666666666667
3 3 1 5
58.2916666666667 58.3333333333333 58.375 58.4166666666667
2 2 1 1
58.4583333333333 58.5416666666667 58.5833333333333 58.6666666666667
1 2 1 1
58.7083333333333 58.75 58.7916666666667 58.8333333333333
2 1 5 5
58.875 58.9166666666667 58.9583333333333 59
4 2 2 1
59.0416666666667 59.0833333333333 59.25 59.2916666666667
1 1 1 2
59.3333333333333 59.4583333333333 59.5 59.5416666666667
1 2 1 4
59.5833333333333 59.6666666666667 59.7916666666667 59.8333333333333
2 3 4 3
59.9166666666667 59.9583333333333 60 60.0833333333333
1 1 1 2
60.1666666666667 60.2083333333333 60.2916666666667 60.375
1 1 1 2
60.4583333333333 60.5 60.5416666666667 60.5833333333333
2 4 1 1
60.6666666666667 60.75 60.7916666666667 60.8333333333333
2 1 1 2
60.875 60.9583333333333 61.0416666666667 61.0833333333333
2 1 1 2
61.125 61.1666666666667 61.2083333333333 61.25
1 3 1 1
61.2916666666667 61.3333333333333 61.4166666666667 61.4583333333333
2 1 1 3
61.5416666666667 61.5833333333333 61.6666666666667 61.7083333333333
5 2 4 2
61.7916666666667 61.875 61.9166666666667 61.9583333333333
4 3 4 2
62 62.0416666666667 62.0833333333333 62.125
3 3 2 2
62.1666666666667 62.2083333333333 62.25 62.2916666666667
3 1 1 2
62.375 62.4166666666667 62.5 62.5416666666667
4 1 2 4
62.5833333333333 62.625 62.6666666666667 62.7916666666667
2 4 3 1
62.8333333333333 62.875 62.9166666666667 63
1 2 3 1
63.0416666666667 63.0833333333333 63.1666666666667 63.2083333333333
2 3 6 1
63.25 63.375 63.4166666666667 63.4583333333333
1 3 1 1
63.5 63.5416666666667 63.5833333333333 63.625
1 1 1 3
63.6666666666667 63.75 63.7916666666667 63.875
2 3 2 1
63.9166666666667 64 64.0416666666667 64.0833333333333
3 1 4 1
64.1666666666667 64.2083333333333 64.25 64.2916666666667
4 1 4 2
64.375 64.4166666666667 64.4583333333333 64.5
2 1 1 2
64.5416666666667 64.625 64.6666666666667 64.75
3 3 4 3
64.7916666666667 64.9166666666667 64.9583333333333 65
1 4 1 2
65.0416666666667 65.0833333333333 65.125 65.1666666666667
3 1 1 5
65.2083333333333 65.25 65.2916666666667 65.3333333333333
1 2 3 2
65.375 65.4166666666667 65.4583333333333 65.5
3 6 2 5
65.5833333333333 65.625 65.6666666666667 65.75
1 8 5 3
65.7916666666667 65.8333333333333 65.875 65.9166666666667
2 2 5 5
66 66.0416666666667 66.125 66.1666666666667
4 4 1 6
66.25 66.2916666666667 66.375 66.4166666666667
5 2 2 3
66.4583333333333 66.5 66.5416666666667 66.5833333333333
1 6 1 1
66.625 66.6666666666667 66.7083333333333 66.75
2 2 3 4
66.7916666666667 66.8333333333333 66.875 66.9166666666667
3 1 4 3
66.9583333333333 67 67.0416666666667 67.125
1 4 3 4
67.1666666666667 67.25 67.2916666666667 67.375
4 1 2 3
67.4166666666667 67.5 67.5416666666667 67.5833333333333
4 3 1 1
67.625 67.6666666666667 67.75 67.7916666666667
6 4 3 1
67.8333333333333 67.875 67.9166666666667 68
2 5 2 4
68.0416666666667 68.125 68.1666666666667 68.25
3 4 3 2
68.2916666666667 68.375 68.4166666666667 68.5
6 3 6 3
68.5416666666667 68.5833333333333 68.625 68.6666666666667
3 1 3 6
68.75 68.7916666666667 68.875 68.9166666666667
5 3 2 3
69 69.0416666666667 69.125 69.1666666666667
5 2 4 1
69.25 69.2916666666667 69.375 69.4166666666667
11 5 9 2
69.5 69.5416666666667 69.5833333333333 69.625
3 4 1 7
69.6666666666667 69.75 69.7916666666667 69.875
6 5 2 7
69.9166666666667 70 70.0416666666667 70.0833333333333
2 4 1 1
70.125 70.1666666666667 70.25 70.2916666666667
5 5 5 3
70.3333333333333 70.375 70.4166666666667 70.5
1 6 4 4
70.5416666666667 70.625 70.75 70.7916666666667
3 6 8 1
70.875 70.9166666666667 71 71.0833333333333
14 1 3 1
71.125 71.1666666666667 71.25 71.375
6 1 11 13
71.4166666666667 71.5 71.5416666666667 71.625
6 8 3 7
71.6666666666667 71.75 71.7916666666667 71.875
3 8 4 7
71.9166666666667 72 72.125 72.1666666666667
3 11 7 3
72.25 72.2916666666667 72.375 72.4166666666667
17 3 9 3
72.5 72.5416666666667 72.625 72.6666666666667
8 4 7 2
72.75 72.875 72.9166666666667 73
6 12 1 9
73.0416666666667 73.125 73.1666666666667 73.25
5 16 2 7
73.375 73.4166666666667 73.5 73.625
18 7 13 9
73.6666666666667 73.75 73.7916666666667 73.875
4 14 4 9
73.9166666666667 74 74.0416666666667 74.125
4 13 2 10
74.1666666666667 74.25 74.375 74.4166666666667
3 15 20 1
74.5 74.625 74.6666666666667 74.75
12 19 5 13
74.7916666666667 74.875 74.9166666666667 75
1 10 2 19
75.0416666666667 75.125 75.1666666666667 75.25
1 8 1 17
75.375 75.4166666666667 75.5 75.625
28 2 16 16
75.6666666666667 75.75 75.7916666666667 75.875
5 19 1 20
75.9166666666667 76 76.0416666666667 76.125
6 14 5 14
76.1666666666667 76.25 76.375 76.5
1 18 15 13
76.5416666666667 76.625 76.6666666666667 76.75
1 20 1 12
76.875 76.9166666666667 77 77.125
17 3 24 10
77.1666666666667 77.25 77.2916666666667 77.375
1 19 2 24
77.4166666666667 77.5 77.625 77.75
2 16 34 20
77.875 77.9166666666667 78 78.125
31 3 26 9
78.1666666666667 78.25 78.375 78.4166666666667
5 18 14 2
78.5 78.625 78.75 78.7916666666667
29 27 33 1
78.875 78.9166666666667 79 79.0416666666667
37 2 32 1
79.125 79.1666666666667 79.25 79.375
16 1 39 17
79.4166666666667 79.5 79.5416666666667 79.625
5 43 1 22
79.6666666666667 79.75 79.875 80
1 57 46 37
80.125 80.1666666666667 80.25 80.375
35 1 31 10
80.4166666666667 80.5 80.625 80.6666666666667
3 52 10 2
80.75 80.7916666666667 80.875 81
41 1 36 55
81.125 81.25 81.375 81.4166666666667
57 36 18 1
81.5 81.625 81.75 81.875
35 10 67 14
82 82.125 82.1666666666667 82.25
95 46 1 47
82.375 82.4166666666667 82.5 82.625
38 1 40 6
82.6666666666667 82.75 82.875 83
1 71 9 127
83.125 83.25 83.375 83.4166666666667
28 76 37 1
83.5 83.625 83.75 83.875
39 16 64 5
84 84.125 84.25 84.375
129 17 152 26
84.5 84.625 84.75 84.875
36 15 43 5
85 85.125 85.1666666666667 85.25
107 3 1 195
85.375 85.5 85.625 85.75
15 119 22 37
85.875 86 86.125 86.25
5 57 2 186
86.375 86.4166666666667 86.5 86.625
3 1 233 14
86.75 86.875 87 87.125
55 8 24 2
87.25 87.375 87.5 87.625
84 1 275 3
87.75 87.875 88 88.125
136 6 33 6
88.25 88.5 88.75 88.875
39 162 238 7
89 89.125 89.25 89.5
61 4 21 64
89.75 89.875 90 90.125
201 2 110 5
90.25 90.5 90.75 91
14 21 102 161
91.125 91.25 91.375 91.5
4 48 1 4
91.75 92 92.25 92.375
38 116 89 1
92.5 92.75 93 93.25
11 2 63 88
93.5 93.625 93.75 94
29 1 1 1
94.25 94.5 94.75 95.5
80 48 1 118
95.75 96 96.5 96.75
5 1 4 18
97.5 97.75 98 98.75
1 2 2 1
99 100
1 4
Code
data_scale2$ SF_mh <- log (data_scale$ SF_mh)
table (data_scale2$ SF_mh)
1.09861228866811 2.12525107771113 2.42036812865043 2.47443534992071
1 1 1 1
2.52572864430826 2.62103882411258 2.69968195143169 2.72457950305342
1 1 1 1
2.74887219562247 2.78809290877575 2.82583323675859 2.84053938414829
1 1 1 2
2.84781214347737 2.86220088092947 2.9041650800285 2.9109910450989
2 1 1 1
2.95751106073379 2.9704144655697 2.99573227355399 3.02042488614436
2 1 1 2
3.02650393222074 3.05045717324324 3.10234200861225 3.10608033072286
1 1 1 1
3.11905548958599 3.13549421592915 3.18324864722505 3.18841661738349
3 1 1 1
3.19867311755068 3.20376218705815 3.2088254890147 3.21386328304466
1 1 1 1
3.23867845216438 3.24356843745857 3.24843462710975 3.25327725158553
2 1 3 1
3.25649268843951 3.26766598903763 3.27241659179623 3.27714473299218
1 2 2 2
3.28653447334202 3.30045581186062 3.30505352110925 3.31872115983792
1 2 2 2
3.32323584019244 3.3322045101752 3.33961744256433 3.34109345759245
2 2 1 1
3.34403896782221 3.34550847580157 3.3499040872746 3.3542804618744
1 1 2 2
3.37587957367787 3.3886185994553 3.39142759006635 3.3998075273731
1 2 1 2
3.40535539181082 3.40811782450673 3.40949618447685 3.41772668361337
2 1 1 2
3.42588999425253 3.43398720448515 3.43801135478487 3.44201937618241
1 2 1 2
3.44998754583159 3.45394794704768 3.46182200347859 3.46963454321538
3 1 4 3
3.47351804324178 3.47609868983527 3.4773865200197 3.48124008933569
2 1 1 1
3.48890296208126 3.49271249049793 3.50028828430639 3.50780711672041
3 1 2 2
3.51154543883102 3.51402991215868 3.515269837922 3.51774508671055
3 1 2 1
3.51898041731854 3.52267727919986 3.52513428289292 3.52636052461616
4 6 1 1
3.53003025350512 3.53368656470823 3.53732955598674 3.54095932403731
1 1 5 1
3.5445759645075 3.5469798118189 3.5481795720108 3.55177024014153
2 2 1 1
3.55534806148941 3.55891312765391 3.56128279700923 3.56246552925828
3 4 1 2
3.56482680544396 3.5660053559634 3.56953269648137 3.57304763858881
1 2 2 1
3.57655026914002 3.58583107821449 3.5869851464326 3.58928929491745
1 1 2 1
3.59043938130068 3.59388172549166 3.60073106733723 3.60413822565885
2 1 7 1
3.60753381465998 3.60979115196163 3.61316763237824 3.61429059712286
2 1 1 1
3.62322920412367 3.62434093297637 3.62766872306904 3.63429126382953
1 1 4 1
3.63648906691201 3.64414356027254 3.64740620590736 3.64957540415491
1 1 2 4
3.65065824129374 3.65282040429823 3.65389973521791 3.65713075579936
2 1 1 1
3.65927898433765 3.6603513704994 3.66249269894074 3.66356164612965
1 2 1 1
3.66676164886032 3.66888930923743 3.66995144422842 3.6731310971458
1 1 1 1
3.67630067190708 3.67840815424664 3.67946023219744 3.68260984110034
2 1 2 2
3.68574956110501 3.69096062031776 3.69199958145018 3.69407427099104
1 1 1 1
3.69717825692863 3.69821078154282 3.70130197411249 3.70335747329459
2 1 1 1
3.7043836406499 3.70643282169484 3.70745583968687 3.71051862921742
1 1 3 2
3.71357206670431 3.71661620908554 3.71762886739992 3.71864050127477
2 3 1 1
3.71965111278069 3.72468890681065 3.72569342723665 3.72769944596352
1 1 1 1
3.73070094896727 3.73169945129686 3.73369346990373 3.73667706237062
3 2 2 1
3.73766961828337 3.73965177948736 3.74064138867253 3.74261767390074
1 3 3 2
3.74360435380318 3.74557479779048 3.74852320287478 3.74950407593037
1 1 3 1
3.75439406122456 3.75536919538277 3.7582889054861 3.76023065366901
1 9 4 1
3.76120011569356 3.76410287535152 3.76699723337789 3.76892216178747
1 2 2 1
3.76988323826702 3.77276093809464 3.77563038052259 3.77753877804835
2 2 1 1
3.77849161280362 3.78324221556222 3.78418963391826 3.78702651525346
4 2 3 2
3.78797035675817 3.78891330826604 3.78985537145394 3.79173683955364
1 2 1 2
3.79267624779558 3.79455242095381 3.7982942400998 3.80015991228275
2 2 1 1
3.80295191037378 3.81128143562661 3.81220267014594 3.81404259706794
1 1 2 1
3.81496129258501 3.81679615548513 3.81862765782859 3.81954215263398
4 1 1 3
3.82228062992728 3.82592030637473 3.83945231259331 3.84034796872126
2 1 2 1
3.8412428233671 3.84213687796398 3.84392259272421 3.8448142557347
3 1 2 2
3.84659520010569 3.84926068369183 3.85103373380172 3.85280364576817
3 1 1 1
3.85457043068006 3.85545265393975 3.85985213309924 3.8607297110406
1 2 2 3
3.86248255986801 3.8642323415918 3.86510608564039 3.86772274653157
1 1 1 2
3.86859344750081 3.87033257837394 3.87120101090789 3.87380179260795
3 1 2 1
3.87553189684573 3.87639582778499 3.87725901299181 3.87812145375246
6 1 1 1
3.88070432217072 3.88156379794344 3.88242253565186 3.88328053656249
2 5 1 1
3.88841313978901 3.89096959623031 3.89351953386359 3.89436807018943
5 2 1 2
3.89606298584942 3.8969093676181 3.89944422322129 3.90113056426172
1 2 3 4
3.90197266957464 3.90449473900735 3.90617259174997 3.90951987521003
1 2 2 1
3.91118932467957 3.91368828474721 3.91535079552082 3.91618101557681
1 3 1 1
3.91783939074959 3.91866754814681 3.91949502026685 3.92114791320515
1 1 3 1
3.9236221412715 3.9244455254267 3.92609026263958 3.92773229913333
1 1 1 2
3.92855230737936 3.92937164376276 3.93019030938359 3.93100830533923
1 1 2 2
3.93264229263088 3.93345828614821 3.93590227921809 3.93671561801852
1 1 2 3
3.93834031374552 3.9391516728164 3.94077241871413 3.94561895485666
2 1 2 4
3.94642443214548 3.94722926116277 3.94803344295118 3.94963986899945
2 4 3 1
3.95124371858143 3.95204467977763 3.9528449999484 3.95364468011897
1 1 2 1
3.9544437213121 3.95524212454812 3.95603989084492 3.9576335166802
1 1 1 1
3.9584293782423 3.9600192036964 3.96239921275321 3.96319129200255
1 2 3 1
3.96398274435886 3.96477357081368 3.96556377235618 3.96635334997319
1 1 2 3
3.96793063736644 3.96950544084151 3.97029191355212 3.97107776820946
2 1 1 4
3.97186300578416 3.97343163355679 3.97421502568459 3.97499780458953
3 1 4 1
3.97656152656572 3.97890253426769 3.97968165390196 3.98046016698137
1 2 4 1
3.98123807444962 3.98356817259124 3.98434366700777 3.9851185604987
4 1 1 1
3.9858928539946 3.98666654842391 3.98821214378569 3.99129618632265
2 1 2 1
3.99206571310168 3.99283464816456 3.9943707467769 3.99513791213865
1 2 4 1
3.99667047948843 3.99743588327628 3.99972858584725 4.00049165341575
3 3 2 1
4.00125413915609 4.00277736869661 4.00353811426392 4.00429828153732
1 1 2 3
4.00505787139534 4.00581688471451 4.00657532236937 4.00733318523247
2 3 1 2
4.00884719006369 4.01035890614901 4.01111390807238 4.01186834039786
3 1 1 2
4.01262220398426 4.01488039086785 4.01563198804717 4.020129746754
2 1 2 3
4.02087741034023 4.02162451534323 4.02237106259701 4.02386248718368
1 5 2 1
4.02535169073515 4.02609546168799 4.02683867985673 4.02832346112431
1 1 1 2
4.02906502585981 4.02980604108453 4.03054650761225 4.03128642625496
2 1 1 1
4.03202579782284 4.03350290296586 4.03497782948692 4.0357144777707
1 3 4 3
4.0364505838032 4.03718614838215 4.03792117230352 4.03865565636151
1 1 4 1
4.04012300805546 4.04085587727111 4.04158820978279 4.042320006376
2 1 2 3
4.04305126783455 4.0437819949405 4.04451218847422 4.04597097793788
1 3 2 2
4.04669957542003 4.04888218814534 4.05033462122566 4.05106004744536
2 5 2 2
4.05250932306135 4.05323317397967 4.05467930582967 4.05540158827349
3 1 1 2
4.05684458996689 4.0575653107188 4.05900519577679 4.0597243615755
2 1 1 1
4.06044301054642 4.06187876097252 4.06259586390752 4.06331245297437
3 3 1 5
4.06545914431754 4.0661736852554 4.06688771598906 4.06760123724659
2 2 1 1
4.06831424975451 4.0697387514199 4.07045024202266 4.07187170637004
1 2 1 1
4.07258168155073 4.07329115302427 4.07400012150487 4.07470858770524
2 1 5 5
4.07541655233658 4.07612401610857 4.07683097972939 4.07753744390572
4 2 2 1
4.07824340934273 4.07894887674413 4.08176578001524 4.08246876774191
1 1 1 2
4.08317126162398 4.08527578712889 4.08597631255158 4.08667634758192
1 2 1 4
4.08737589290601 4.08877351717264 4.09086629784578 4.09156291926022
2 3 4 3
4.09295470793305 4.09364987653942 4.0943445622221 4.09573248749695
1 1 1 2
4.09711848910483 4.09781077019859 4.09919389628354 4.10057511197274
1 1 1 2
4.10195442253624 4.1026433650368 4.10333183322234 4.10401982774552
2 4 1 1
4.10539439840869 4.10676708222066 4.10745271817484 4.10813788435444
2 1 1 2
4.10882258140275 4.11019057067218 4.11155669110322 4.11223905209865
2 1 1 2
4.11292094779504 4.11360237882652 4.11428334582593 4.11496384942484
1 3 1 1
4.11564389025349 4.11632346894088 4.11768124240134 4.11835943842597
2 1 1 3
4.11971445218343 4.1203912711602 4.12174353641022 4.12241898391985
5 2 4 2
4.12376851178999 4.12511622088885 4.12578939492976 4.12646211611221
4 3 4 2
4.12713438504509 4.12780620233606 4.12847756859156 4.12914848441679
3 3 2 2
4.12981895041576 4.13048896719125 4.13115853534482 4.13182765547684
3 1 1 2
4.13316455407168 4.13383233372922 4.13516655674236 4.13583300128552
4 1 2 4
4.13649900197613 4.13716455940503 4.13782967416184 4.13982236827855
2 4 3 1
4.14048571821996 4.1411486284199 4.14181109946102 4.14313472639153
1 2 3 1
4.14379588344041 4.14445660364945 4.14577673585437 4.14643614900059
2 3 6 1
4.14709512760763 4.14906946191135 4.14972670807369 4.15038352254722
1 3 1 1
4.15103990589865 4.15169585869357 4.15235138149646 4.15300647487069
1 1 1 3
4.15366113937852 4.15496918403854 4.15562256530974 4.15692804852387
2 3 2 1
4.15758015157926 4.15888308335967 4.15953391319065 4.16018431971764
3 1 4 1
4.16148386505973 4.16213300497217 4.16278172377533 4.16343002201522
4 1 4 2
4.1647253589839 4.16537239879942 4.16601902022512 4.16666522380173
2 1 1 2
4.16731101006892 4.16860133282859 4.16924587039522 4.17053370057965
3 3 4 3
4.17117699426539 4.17310439608275 4.17374603870983 4.17438726989564
1 4 1 2
4.17502809016749 4.17566850005169 4.17630850007353 4.17694809075731
3 1 1 5
4.17758727262631 4.1782260462028 4.17886441200807 4.17950237056241
1 2 3 2
4.18013992238509 4.18077706799441 4.18141380790768 4.18205014264121
3 6 2 5
4.1833215986294 4.18395672091179 4.18459144006988 4.18585967105787
1 8 5 3
4.1864931839077 4.18712629567307 4.18775900686153 4.18839131797965
2 2 5 5
4.18965474202643 4.19028585596344 4.19154689017846 4.19217681145914
4 4 1 6
4.19343546486633 4.19406419798984 4.1953204795621 4.19594802900221
5 2 2 3
4.196575184871 4.19720194766181 4.19782831786707 4.19845429597827
1 6 1 1
4.19907988248601 4.19970507787993 4.20032988264877 4.20095429728036
2 2 3 4
4.20157832226161 4.20220195807851 4.20282520521617 4.20344806415876
3 1 4 3
4.20407053538957 4.20469261939097 4.20531431664444 4.20655655282903
1 4 3 4
4.20717709271863 4.20841701848195 4.20903640530881 4.21027402922916
4 1 2 3
4.21089226727049 4.21212759787848 4.21274469138773 4.21336140432741
4 3 1 1
4.21397773716665 4.21459369037368 4.21582445975981 4.21643927687109
6 4 3 1
4.21705371621454 4.2176677782541 4.21828146345286 4.21950770517611
2 5 2 4
4.22012026262252 4.22134425298341 4.22195568681475 4.22317743406507
3 4 3 2
4.22378774839588 4.22500726074214 4.22561645966443 4.22683374526818
6 3 6 3
4.22744183285153 4.22804955088907 4.22865689982969 4.22926388012147
3 1 3 6
4.23047673654668 4.23108261357218 4.23229326747308 4.23289804523569
5 3 2 3
4.23410650459726 4.23471018707862 4.2359164598425 4.23651905100264
5 2 4 1
4.23772314506745 4.23832464884498 4.2395265720666 4.24012699237884
11 5 9 2
4.24132675257075 4.24192609331389 4.24252507506286 4.24312369824745
3 4 1 7
4.2437219632967 4.24491742070147 4.24551461391122 4.24670793147526
6 5 2 7
4.24730405667921 4.24849524204936 4.24909030306067 4.24968501018495
2 4 1 1
4.25027936384286 4.25087336445433 4.25206030821386 4.25265325219802
5 5 5 3
4.25324584480796 4.25383808645985 4.25442997756917 4.25561270981822
1 6 4 4
4.25620355178519 4.25738418946661 4.25915253652335 4.25974129132399
3 6 8 1
4.26091776204792 4.26150547878537 4.26267987704132 4.26385289770368
14 1 3 1
4.26443889244649 4.26502454400057 4.26619481914876 4.26794766797617
6 1 11 13
4.26853126880978 4.26969744969996 4.27028003054953 4.2714441750349
6 8 3 7
4.27202573945955 4.27318785463973 4.27376840617998 4.27492849911751
3 8 4 7
4.27550804129543 4.27666611901606 4.27840072482826 4.27897825877443
3 11 7 3
4.28013232699254 4.28070886203301 4.28186093589316 4.28243647547739
17 3 9 3
4.28358656186063 4.28416110942024 4.28530921517208 4.28588277412098
8 4 7 2
4.2870289060516 4.28874564467066 4.28931723656961 4.29045944114839
6 12 1 9
4.29103005457329 4.29217030555202 4.29273994384712 4.29387824789718
5 16 2 7
4.29558327814826 4.29615097614818 4.29728540621879 4.29898464197175
18 7 13 9
4.29955041284964 4.30068099521993 4.30124580743489 4.30237447572626
4 14 4 9
4.30293833252158 4.30406509320417 4.30462799780671 4.30575285731789
4 13 2 10
4.30631481293818 4.30743777768281 4.30911986386579 4.3096799310885
3 15 20 1
4.31079912538551 4.31247557171277 4.31303376318693 4.3141492122708
12 19 5 13
4.31470647057443 4.31582005643561 4.31637638468362 4.31748811353631
1 10 2 19
4.31804351482801 4.31915339285537 4.31970787027462 4.32081590362899
1 8 1 17
4.32247565504735 4.32302829391193 4.32413265625498 4.32578691635101
28 2 16 16
4.32633772881329 4.32743844438948 4.32798834817018 4.32908724937966
5 19 1 20
4.32963624747196 4.33073334028633 4.33128143566865 4.33237672603006
6 14 5 14
4.33292392166615 4.33401741548752 4.33565541749176 4.33729074083249
1 18 15 13
4.33783525486718 4.33892339425638 4.33946702025509 4.34055338646731
1 20 1 12
4.34218072612668 4.34272258471485 4.34380542185368 4.34542748222555
17 3 24 10
4.34596758485818 4.34704691577786 4.34758614469359 4.34866373100476
1 19 2 24
4.34920208902584 4.3502779363593 4.35188954025364 4.35349855105934
2 16 34 20
4.35510497710762 4.35563987950069 4.35670882668959 4.35831010805657
31 3 26 9
4.35884329921822 4.35990882942026 4.36150499895308 4.36203648979738
5 18 14 2
4.36309862478836 4.3646897150206 4.36627827770574 4.36680723831051
29 27 33 1
4.36786432086138 4.36839244339808 4.36944785246702 4.36997513958707
37 2 32 1
4.37102888046434 4.37155533480659 4.37260741275739 4.37418345721286
16 1 39 17
4.3747082538662 4.37575702166029 4.3762809933778 4.37732811389233
5 43 1 22
4.3778512632634 4.37889674166495 4.3804629126977 4.38202663467388
1 57 46 37
4.38358791524083 4.38410780087771 4.38514676201012 4.38670318255778
35 1 31 10
4.38722145155099 4.38825718442452 4.38980877511594 4.39032543748858
3 52 10 2
4.39135796210277 4.39187382489471 4.39290475282106 4.39444915467244
41 1 36 55
4.39599117502425 4.39753082120985 4.39906810052873 4.39958000225478
57 36 18 1
4.40060302024682 4.40213558759659 4.40366580977736 4.40519369395542
35 10 67 14
4.40671924726425 4.40824247680477 4.40874970481464 4.40976338964548
95 46 1 47
4.41128199282267 4.41178768183471 4.41279829334063 4.41431229817185
38 1 40 6
4.41481645749687 4.41582401425717 4.41733344850603 4.4188406077966
1 71 9 127
4.42034549897602 4.42184812886055 4.42334850423579 4.42384812952722
28 76 37 1
4.42484663185681 4.42634251844839 4.42783617070518 4.42932759529185
39 16 64 5
4.43081679884331 4.43230378796489 4.43378856923247 4.43527114919269
129 17 152 26
4.43675153436313 4.43822973123244 4.43970574626056 4.44117958587886
36 15 43 5
4.44265125649032 4.44412076446968 4.44461012097565 4.44558811616363
107 3 1 195
4.44705331789095 4.44851637594271 4.44997729658239 4.45143608604605
15 119 22 37
4.45289275054251 4.45434729625351 4.45579972933382 4.45725005591147
5 57 2 186
4.45869828208783 4.45918055844153 4.46014441393783 4.46158845751007
3 1 233 14
4.46303041882697 4.46447030388496 4.46590811865458 4.46734386908069
55 8 24 2
4.46877756108254 4.47020920055397 4.47163879336357 4.47306634535475
84 1 275 3
4.47449186234598 4.47591535013083 4.47733681447821 4.47875626113243
136 6 33 6
4.48017369581341 4.48300255201388 4.48582342835553 4.48723088812341
39 162 238 7
4.48863636973214 4.49003987873446 4.49144142065975 4.49423862528081
61 4 21 64
4.49702802736839 4.49841981604121 4.49980967033027 4.50119759560511
201 2 110 5
4.50258359721299 4.50534985070588 4.50810847314496 4.51085950651685
14 21 102 161
4.51223219032882 4.5136029924626 4.51497191806994 4.51633897228148
4 48 1 4
4.51906748693468 4.52178857704904 4.52450228292064 4.52585637926837
38 116 89 1
4.52720864451838 4.52990770148754 4.53259949315326 4.53528405852393
11 2 63 88
4.53796143629464 4.53929744183738 4.54063166485052 4.54329478227
29 1 1 1
4.54595082632812 4.5485998344997 4.55124184396254 4.55912624748668
80 48 1 118
4.56174062806076 4.56434819146784 4.56954300834494 4.57213033190989
5 1 4 18
4.5798523780038 4.58241319886548 4.58496747867057 4.59259140378123
1 2 2 1
4.59511985013459 4.60517018598809
1 4
Code
SF_mh_preF_factor_log<- lm (log (SF_mh) ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
plot (SF_mh_preF_factor_log, 2 )
Code
shapiro.test (resid (SF_mh_preF_factor_log))
Shapiro-Wilk normality test
data: resid(SF_mh_preF_factor_log)
W = 0.70988, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.071519 1 1.035142
Weight 1.207559 1 1.098890
Height 1.141302 1 1.068318
CalendarTime 1.419709 1 1.191515
as.factor(Intervention_months_factor) 1.417997 4 1.044623
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_mh_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_postF
BP = 22.99, df = 8, p-value = 0.003377
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_postF)
W = 0.80763, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3296 13.8 11.0 19.8 14.2 24
2 5278 29.7 5.98 36.8 4.24 16
3 6670 22.6 3.98 1.81 -9.76 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 9 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 514 27.1 7.98 -6.19 6.24 16
2 1423 25.8 4.98 -13.2 -6.76 16
3 1438 19.2 8.98 11.8 -5.76 16
4 1689 27.2 3.98 -4.19 -1.76 16
5 3296 13.8 11.0 19.8 14.2 24
6 4753 28.2 4.98 8.81 -8.76 16
7 5040 38.8 28.0 -11.2 -4.76 16
8 5844 31.5 11.0 -12.2 -17.8 16
9 6670 22.6 3.98 1.81 -9.76 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
Code
# log transform to pass normality.
SF_mh_postF_factor_fix <- lm (log1p (SF_mh) ~ log1p (Age) + log1p (Weight) + log1p (Height) + Intervention_months_factor,
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
qqnorm (resid (SF_mh_postF_factor_fix), col = "grey" )
qqline (resid (SF_mh_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_postF_factor_fix)
W = 0.65258, p-value = 1.188e-15
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035241 1 1.017468
Weight 1.147876 1 1.071390
Height 1.172249 1 1.082704
CalendarTime 1.314283 1 1.146422
as.factor(Intervention_months_factor) 1.328800 4 1.036173
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (SF_mh_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_M
BP = 24.379, df = 8, p-value = 0.001979
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_M)
W = 0.78863, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 897 31.9 -9.02 66.8 23.2 16
2 3057 35.1 13.0 71.8 2.24 24
3 4049 19.5 -22.0 56.8 5.24 24
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 89 × 14
.rownames SF_mh Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 423 39.6 -9.02 -18.2 3.24 16
2 500 39.2 -4.02 -2.19 18.2 16
3 501 42.5 -14.0 1.81 3.24 16
4 581 38.5 -12.0 -7.19 1.24 16
5 662 31 24.0 16.8 1.24 24
6 759 38.2 12.0 -6.19 3.24 24
7 856 42.8 7.98 -6.19 3.24 24
8 897 31.9 -9.02 66.8 23.2 16
9 959 37.6 -5.02 13.8 8.24 24
10 974 41.2 15.0 17.8 7.24 24
# ℹ 79 more rows
# ℹ 8 more variables: `as.factor(Intervention_months_factor)` <fct>,
# .fitted <dbl>, .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
# .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_mh_Mfactor_fix <- lm (SF_mh~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_mh_Mfactor_fix), col = "grey" )
qqline (resid (SF_mh_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_Mfactor_fix)
W = 0.68271, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.093076 1 1.045503
Weight 1.274621 1 1.128991
Height 1.252068 1 1.118959
CalendarTime 1.284247 1 1.133246
as.factor(Intervention_months_factor) 1.292244 4 1.032567
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_mh_preF_robust <- lmrob (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF_robust <- lmrob (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_Mrobust <- lmrob (SF_mh ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF_robust, SF_mh_postF_robust, SF_mh_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Robust: Mental health (Sf-36), higher score indicates better health" , show.reflvl = T)
Robust: Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
86.16
84.04 – 88.28
<0.001
83.79
81.61 – 85.98
<0.001
87.31
86.00 – 88.61
<0.001
Age
0.14
0.09 – 0.18
<0.001
0.13
0.07 – 0.18
<0.001
0.11
0.09 – 0.13
<0.001
Weight
-0.00
-0.04 – 0.03
0.896
-0.05
-0.08 – -0.01
0.015
-0.02
-0.04 – 0.00
0.117
Height
0.02
-0.05 – 0.08
0.605
0.05
-0.01 – 0.12
0.108
0.05
0.01 – 0.09
0.009
CalendarTime
-0.14
-0.25 – -0.04
0.006
-0.06
-0.17 – 0.05
0.288
-0.14
-0.20 – -0.07
<0.001
6-12 months
-1.06
-2.07 – -0.04
0.041
-0.12
-1.19 – 0.94
0.819
0.29
-0.37 – 0.95
0.385
12-18 months
-0.10
-1.07 – 0.87
0.836
0.29
-0.75 – 1.32
0.589
-0.46
-1.13 – 0.21
0.180
18-24 months
-0.53
-2.23 – 1.16
0.537
-1.06
-2.95 – 0.82
0.267
0.51
-0.49 – 1.51
0.314
24+ months
-0.24
-1.52 – 1.04
0.717
0.38
-0.96 – 1.71
0.581
0.34
-0.43 – 1.10
0.388
Observations
2404
1476
3595
R2 / R2 adjusted
0.022 / 0.019
0.023 / 0.018
0.063 / 0.061
Warm glow
Warm glow is defined by a score of >= 6 on average on warm glow questions (3 questions).
Predict by ferritin
Code
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (Warmglow) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (Warmglow)), aes (Fergroup, Percentage, fill = Warmglow)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = Warmglow)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- t.test (Ferritin ~ Warmglow, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by Warmglow
t = -0.48007, df = 6602, p-value = 0.6312
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-2.511109 1.523153
sample estimates:
mean in group No mean in group Yes
37.39408 37.88806
Code
ggplot (subset (data_scale, ! is.na (Warmglow)), aes (x= Warmglow, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Warm glow" , y= "Ferritin" )
Models
Code
WG_preF <- glm (Warmglow ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
WG_postF <- glm (Warmglow ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
WG_M <- glm (Warmglow ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (WG_preF, WG_postF, WG_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Warmglow, yes/no" , show.reflvl = T)
Warmglow, yes/no
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
2.16
1.26 – 3.70
0.005
1.61
0.83 – 3.13
0.155
3.48
2.31 – 5.24
<0.001
Age
1.03
1.02 – 1.04
<0.001
1.01
1.00 – 1.03
0.107
1.02
1.02 – 1.03
<0.001
Weight
1.00
0.99 – 1.01
0.966
0.99
0.98 – 1.00
0.266
1.01
1.00 – 1.01
0.033
Height
1.00
0.99 – 1.02
0.604
0.99
0.97 – 1.01
0.320
1.00
0.99 – 1.01
0.800
CalendarTime
0.98
0.96 – 1.01
0.205
1.00
0.97 – 1.03
0.944
0.96
0.94 – 0.98
<0.001
Intervention_months_factor1
1.01
0.79 – 1.29
0.956
0.86
0.63 – 1.19
0.372
0.88
0.71 – 1.09
0.237
Intervention_months_factor2
1.40
1.10 – 1.78
0.006
0.80
0.60 – 1.07
0.134
0.98
0.80 – 1.20
0.821
Intervention_months_factor3
1.12
0.80 – 1.57
0.513
0.43
0.27 – 0.70
0.001
0.88
0.66 – 1.18
0.382
Intervention_months_factor4
1.16
0.86 – 1.56
0.337
0.91
0.60 – 1.39
0.656
0.99
0.78 – 1.25
0.939
Observations
2167
1403
3386
R2 Tjur
0.016
0.014
0.035
Assumptions
Premenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 14
.rownames Warmglow Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 No -6.02 49.8 5.24 16
2 4031 No 0.985 36.8 1.24 24
3 5873 Yes -24.0 48.8 -4.76 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
cooksd = cooks.distance (WG_preF)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
275 -1.485627 0.012589442 0.002836555
386 -1.572073 0.004375231 0.001188061
405 -1.574393 0.004220537 0.001151878
5873 1.274258 0.018607018 0.002631894
7349 -1.182457 0.015587865 0.001780371
Code
car:: influenceIndexPlot (WG_preF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.074778 1 1.036715
Weight 1.206945 1 1.098610
Height 1.139151 1 1.067310
CalendarTime 1.429483 1 1.195610
Intervention_months_factor 1.421997 4 1.044991
Postmenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 14
.rownames Warmglow Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 1032 Yes 2.98 57.8 -2.76 24
2 2675 No 11.0 55.8 1.24 16
3 5303 No 25.0 38.8 -7.76 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (WG_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 1.1741073 0.030669720 0.003489540
2675 -1.3096454 0.024417000 0.003759449
4404 0.7179887 0.044936680 0.001562035
5494 -1.6532263 0.005193491 0.001686551
6518 -1.6401320 0.005243758 0.001654482
Code
car:: influenceIndexPlot (WG_postF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.036394 1 1.018034
Weight 1.159445 1 1.076775
Height 1.180832 1 1.086661
CalendarTime 1.334970 1 1.155409
Intervention_months_factor 1.357413 4 1.038936
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 14
.rownames Warmglow Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 No -9.02 66.8 23.2 16
2 1294 No -9.02 -3.19 -56.8 24
3 1510 No 25.0 41.8 -6.76 16
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = Warmglow), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # see zero influential data points
# A tibble: 0 × 14
# ℹ 14 variables: .rownames <chr>, Warmglow <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, CalendarTime <dbl>,
# Intervention_months_factor <fct>, .fitted <dbl>, .resid <dbl>, .hat <dbl>,
# .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (WG_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_M,col= "red" ,id.n= 3 )
StudRes Hat CookD
897 -1.560565 0.008758757 0.0023221783
1294 -1.152003 0.033447136 0.0036251881
1510 -1.780492 0.005387099 0.0023169988
4049 1.165328 0.011614189 0.0012691814
6533 -1.764812 0.001483218 0.0006169958
Code
car:: influenceIndexPlot (WG_M, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.088254 1 1.043194
Weight 1.263102 1 1.123878
Height 1.249595 1 1.117853
CalendarTime 1.279070 1 1.130960
Intervention_months_factor 1.285683 4 1.031910
Fatigue
Predict by ferritin
Code
res <- cor.test (data_scale$ CIS_Totalmean, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CIS_Totalmean, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 120 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Fatigue, higher is worse" , y= "Ferritine" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CIS_Totalmean)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Fatigue, higher is worse" )
Models
Code
CIS_preF <- lm (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF <- lm (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M <- lm (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF, CIS_postF, CIS_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse, show.reflvl = T, digits = 3" )
Fatigue, higher score is worse, show.reflvl = T, digits = 3
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
84.25
82.64 – 85.87
<0.001
84.02
82.04 – 86.01
<0.001
85.52
84.34 – 86.69
<0.001
Age
0.09
0.05 – 0.12
<0.001
0.07
0.02 – 0.12
0.004
0.09
0.08 – 0.10
<0.001
Weight
0.01
-0.01 – 0.03
0.446
0.02
-0.01 – 0.05
0.206
0.02
0.00 – 0.04
0.039
Height
-0.02
-0.07 – 0.02
0.266
-0.07
-0.12 – -0.01
0.019
-0.00
-0.03 – 0.03
0.853
CalendarTime
-0.04
-0.12 – 0.04
0.293
0.03
-0.06 – 0.13
0.505
-0.02
-0.08 – 0.04
0.543
Intervention_months_factor1
-0.27
-1.01 – 0.47
0.479
-0.23
-1.19 – 0.74
0.646
-0.11
-0.72 – 0.50
0.730
Intervention_months_factor2
0.63
-0.10 – 1.36
0.090
0.19
-0.70 – 1.09
0.669
-0.64
-1.21 – -0.06
0.031
Intervention_months_factor3
-0.31
-1.30 – 0.68
0.539
-0.02
-1.51 – 1.46
0.974
-0.78
-1.63 – 0.07
0.071
Intervention_months_factor4
-0.36
-1.24 – 0.52
0.422
-0.50
-1.76 – 0.76
0.435
-0.30
-0.98 – 0.38
0.385
Observations
2360
1471
3558
R2 / R2 adjusted
0.017 / 0.013
0.012 / 0.007
0.054 / 0.052
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance (heteroscedasticity)
plot (CIS_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CIS_preF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_preF) # The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_preF
BP = 12.73, df = 8, p-value = 0.1215
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_preF)
W = 0.98294, p-value = 3.238e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3740 122 -8.02 -2.19 -6.76 24
2 6127 60 -15.0 31.8 -0.760 24
3 7695 107 -23.0 -9.19 -6.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 15 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3006 61 -13.0 -17.2 -6.76 24
2 3213 64 -2.02 8.81 -13.8 24
3 3514 61 -17.0 -15.2 -11.8 24
4 3740 122 -8.02 -2.19 -6.76 24
5 4126 58 -10.0 1.81 1.24 24
6 5337 64 -3.02 -18.2 -11.8 16
7 5578 62 -17.0 -9.19 -3.76 24
8 5674 101 -23.0 8.81 2.24 16
9 6127 60 -15.0 31.8 -0.760 24
10 6282 104 -14.0 -18.2 -1.76 24
11 6357 61 -24.0 -17.2 -7.76 16
12 6859 55 -16.0 -18.2 -8.76 24
13 7026 58 -6.02 -13.2 -8.76 24
14 7239 59 -24.0 -24.2 -11.8 24
15 7695 107 -23.0 -9.19 -6.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CIS_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# check log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ CIS_Totalmean)
33 50 55 58
1 1 1 2
59 60 61 62
3 1 4 5
63 63.1578947368421 64 65
1 1 10 10
65.4545454545455 65.5555555555556 66 67
1 1 20 10
67.2727272727273 67.3684210526316 68 68.4210526315789
1 1 33 2
69 69.4736842105263 70 70.5263157894737
47 1 30 2
71 71.5789473684211 72 72.6315789473684
60 5 70 2
73 73.6842105263158 74 74.1176470588235
85 10 114 1
74.7368421052632 75 75.2941176470588 75.7894736842105
6 140 1 5
76 76.8421052631579 77 77.7777777777778
162 7 186 1
77.8947368421053 78 78.8235294117647 78.8888888888889
14 238 1 1
78.9473684210526 79 80 81
11 246 301 342
81.0526315789474 81.1111111111111 81.1764705882353 81.25
9 1 1 1
82 82.1052631578947 82.2222222222222 83
362 27 1 467
83.1578947368421 83.3333333333333 83.5294117647059 84
40 1 1 489
84.2105263157895 84.4444444444444 84.7058823529412 85
25 2 1 525
85.2631578947368 86 86.3157894736842 86.6666666666667
30 773 20 5
87 87.3684210526316 87.5 88
542 27 1 517
88.421052631579 88.8888888888889 89 89.4736842105263
12 3 398 17
90 90.5263157894737 90.5882352941177 91
299 22 1 249
91.1111111111111 91.578947368421 92 92.2222222222222
2 6 217 1
92.6315789473684 93 93.3333333333333 93.6842105263158
9 152 2 9
94 94.1176470588235 94.7368421052632 95
97 1 6 96
95.5555555555556 95.7894736842105 96 96.6666666666667
1 1 64 1
96.8421052631579 97 97.8947368421053 98
7 35 6 38
98.8888888888889 98.9473684210526 99 100
1 2 29 8
101 101.052631578947 102 102.105263157895
16 3 6 1
103 104 104.210526315789 105
7 4 1 1
106 107 108 108.421052631579
4 3 3 2
110 110.526315789474 112.631578947368 114
2 1 1 3
115 116 116.842105263158 117
2 4 1 2
119 121 122 123
1 1 1 1
125 128 128.888888888889 131
1 2 1 1
140
1
Code
data_scale2$ CIS_Totalmean <- log10 (data_scale$ CIS_Totalmean)
table (data_scale2$ CIS_Totalmean)
1.51851393987789 1.69897000433602 1.74036268949424 1.76342799356294
1 1 1 2
1.77085201164214 1.77815125038364 1.78532983501077 1.79239168949825
3 1 4 5
1.79934054945358 1.8004276450948 1.80617997398389 1.81291335664286
1 1 10 10
1.81593981127304 1.81660950220282 1.81954393554187 1.82607480270083
1 1 20 10
1.82783903457275 1.82845636869504 1.83250891270624 1.83518975135401
1 1 33 2
1.83884909073726 1.84182033025302 1.84509804001426 1.84835119741198
47 1 30 2
1.85125834871908 1.85478530741739 1.85733249643127 1.86112548544841
60 5 70 2
1.86332286012046 1.86737443472541 1.86923171973098 1.86992162373929
85 10 114 1
1.87353474343023 1.8750612633917 1.87676104826959 1.87960889114242
6 140 1 5
1.88081359228079 1.88559925483161 1.88649072517248 1.89085553057493
162 7 186 1
1.89150811444213 1.89209460269048 1.89665587698653 1.89701583927975
14 238 1 1
1.89733765810285 1.89762709129044 1.90308998699194 1.90848501887865
11 246 301 342
1.90876711988363 1.90908035068113 1.90943016502296 1.90982336965091
9 1 1 1
1.91381385238372 1.91437099740163 1.91498921029165 1.91907809237607
362 27 1 467
1.91990348600159 1.92081875395238 1.92183942300478 1.92427928606188
40 1 1 489
1.9253663817031 1.92657108284147 1.92791357071698 1.92941892571429
25 2 1 525
1.9307614135898 1.93449845124357 1.93609024709487 1.93785209325116
30 773 20 5
1.93951925261862 1.94135448708723 1.94200805302231 1.94448267215017
542 27 1 517
1.94655568077303 1.94884747755262 1.94939000664491 1.95169532042545
12 3 398 17
1.95424250943932 1.95677484595472 1.95707179945819 1.95904139232109
299 22 1 249
1.95957134294439 1.96179564732977 1.96378782734556 1.96483558293675
2 6 217 1
1.96675906686132 1.96848294855394 1.97003677662256 1.97166640135606
9 152 2 9
1.9731278535997 1.97367106127765 1.97651890415048 1.97772360528885
97 1 6 96
1.98025594180424 1.98131778703225 1.98227123303957 1.98527674317929
1 1 64 1
1.98606422205671 1.98677173426624 1.99075934326509 1.99122607569249
7 35 6 38
1.99514749720559 1.99540424831085 1.99563519459755 2
1 2 29 8
2.00432137378264 2.00454762775072 2.00860017176192 2.0090481289774
16 3 6 1
2.01283722470517 2.01703333929878 2.0179115893087 2.02118929906994
7 4 1 1
2.02530586526477 2.02938377768521 2.03342375548695 2.03511361941632
4 3 3 2
2.04139268515822 2.04346569378109 2.05166017239636 2.05690485133647
2 1 1 3
2.06069784035361 2.06445798922692 2.06759937349781 2.06818586174616
2 4 1 2
2.07554696139253 2.08278537031645 2.08635983067475 2.0899051114394
1 1 1 1
2.09691001300806 2.10720996964787 2.11021547978759 2.11727129565576
1 2 1 1
2.14612803567824
1
Code
#analaysis with log transformed
CIS_preF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CIS_preF_factor_fix), col = "grey" )
qqline (resid (CIS_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_preF_factor_fix)
W = 0.97017, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.071632 1 1.035197
Weight 1.204900 1 1.097679
Height 1.137995 1 1.066768
CalendarTime 1.423279 1 1.193013
Intervention_months_factor 1.420789 4 1.044880
Postmenopausal females: *
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance looks ok(heteroscedasticity)
plot (CIS_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_postF, which = 3 ) # constant variance ok
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_postF
BP = 11.638, df = 8, p-value = 0.1681
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_postF)
W = 0.9401, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1208 140 12.0 6.81 -13.8 16
2 6761 123 9.98 -2.19 -13.8 24
3 7896 128 6.98 -18.2 -4.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 8 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1208 140 12.0 6.81 -13.8 16
2 1606 116 11.0 -0.189 -10.8 24
3 1881 116 19.0 4.81 -8.76 16
4 2120 59 9.98 -13.2 -6.76 16
5 6229 115 4.98 -18.2 -16.8 24
6 6761 123 9.98 -2.19 -13.8 24
7 7035 116 22.0 -13.2 1.24 24
8 7896 128 6.98 -18.2 -4.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CIS_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CIS_postF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CIS_postF_factor_fix), col = "grey" )
qqline (resid (CIS_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_postF_factor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035520 1 1.017605
Weight 1.149492 1 1.072144
Height 1.173248 1 1.083166
CalendarTime 1.314824 1 1.146658
Intervention_months_factor 1.330228 4 1.036313
Males:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CIS_M, which = 1 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_M, which = 3 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CIS_M
BP = 17.574, df = 8, p-value = 0.02466
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_M)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_M)
W = 0.95921, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1006 128 26.0 35.8 3.24 24
2 1603 125 12.0 6.81 17.2 24
3 5561 33 29.0 -7.19 -5.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 12 × 14
.rownames CIS_Totalmean Age[,1] Weight[,1] Height[,1] CalendarTime
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 697 129. 12.0 14.8 11.2 24
2 1006 128 26.0 35.8 3.24 24
3 1089 116 22.0 19.8 9.24 24
4 1603 125 12.0 6.81 17.2 24
5 1807 117 16.0 -8.19 -8.76 16
6 2028 114 26.0 -9.19 -8.76 16
7 3288 119 22.0 24.8 6.24 24
8 4216 121 14.0 0.811 5.24 24
9 4969 117 2.98 29.8 1.24 16
10 5561 33 29.0 -7.19 -5.76 24
11 7370 50 0.985 11.8 8.24 24
12 8028 114 0.985 12.8 -6.76 24
# ℹ 8 more variables: Intervention_months_factor <fct>, .fitted <dbl>,
# .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>,
# index <int>
Code
#Cook's distance
cooksd = cooks.distance (CIS_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CISMfactor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CISMfactor_fix), col = "grey" )
qqline (resid (CISMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CISMfactor_fix))
Shapiro-Wilk normality test
data: resid(CISMfactor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092269 1 1.045117
Weight 1.272654 1 1.128119
Height 1.251368 1 1.118646
CalendarTime 1.283325 1 1.132839
Intervention_months_factor 1.291711 4 1.032513
::: {.callout-note appearance=“default”} ### Decision
We conclude that the assumptions were violated and thus we use robust models. :::
Robust models
Code
CIS_preF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + CalendarTime+ as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF_robust, CIS_postF_robust, CIS_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse (robust regression)" , show.reflvl = T, digits = 3 )
Fatigue, higher score is worse (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
84.363
82.789 – 85.938
<0.001
83.993
82.281 – 85.705
<0.001
85.510
84.481 – 86.538
<0.001
Age
0.089
0.054 – 0.123
<0.001
0.073
0.029 – 0.117
0.001
0.080
0.067 – 0.092
<0.001
Weight
0.005
-0.019 – 0.029
0.696
0.014
-0.011 – 0.039
0.288
0.012
-0.007 – 0.031
0.221
Height
-0.023
-0.066 – 0.021
0.314
-0.051
-0.100 – -0.002
0.042
0.001
-0.028 – 0.029
0.947
CalendarTime
-0.033
-0.108 – 0.042
0.391
0.043
-0.040 – 0.127
0.305
-0.010
-0.062 – 0.041
0.694
6-12 months
-0.286
-1.013 – 0.441
0.440
-0.371
-1.187 – 0.444
0.372
-0.387
-0.914 – 0.140
0.150
12-18 months
0.688
-0.051 – 1.427
0.068
0.340
-0.431 – 1.110
0.387
-0.501
-1.014 – 0.012
0.056
18-24 months
-0.687
-1.663 – 0.289
0.167
-0.381
-1.613 – 0.852
0.545
-0.736
-1.599 – 0.127
0.095
24+ months
-0.210
-1.093 – 0.672
0.640
-0.718
-1.852 – 0.415
0.214
-0.221
-0.828 – 0.386
0.474
Observations
2360
1471
3558
R2 / R2 adjusted
0.019 / 0.016
0.016 / 0.011
0.055 / 0.053